“71e046b0ba6a46fa72bd50dd847046e99a3ca0fe”上不存在“examples/ljspeech/tts0/path.sh”
提交 9b0d2062 编写于 作者: S sunyanfang01

fix the conflicts

...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude: (?!.*third_party)^.*$ exclude: (?!.*third_party)^.*$
- repo: local
hooks: hooks:
- id: clang-format-with-version-check - id: clang-format-with-version-check
name: clang-format name: clang-format
...@@ -31,10 +32,11 @@ ...@@ -31,10 +32,11 @@
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
- repo: local
hooks: hooks:
- id: cpplint-cpp-source - id: cpplint-cpp-source
name: cpplint name: cpplint
description: Check C++ code style using cpplint.py. description: Check C++ code style using cpplint.py.
entry: bash cpplint_pre_commit.hook entry: bash ./tools/codestyle/cpplint_pre_commit.hook
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
...@@ -66,7 +66,7 @@ int main(int argc, char** argv) { ...@@ -66,7 +66,7 @@ int main(int argc, char** argv) {
std::cout << "image file: " << image_path std::cout << "image file: " << image_path
<< ", predict label: " << result.boxes[i].category << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id << ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box:(" << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", " << result.boxes[i].coordinate[2] << ", "
...@@ -89,7 +89,7 @@ int main(int argc, char** argv) { ...@@ -89,7 +89,7 @@ int main(int argc, char** argv) {
for (int i = 0; i < result.boxes.size(); ++i) { for (int i = 0; i < result.boxes.size(); ++i) {
std::cout << ", predict label: " << result.boxes[i].category std::cout << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id << ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box:(" << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", " << result.boxes[i].coordinate[2] << ", "
......
...@@ -63,9 +63,10 @@ class SegResult : public BaseResult { ...@@ -63,9 +63,10 @@ class SegResult : public BaseResult {
public: public:
Mask<int64_t> label_map; Mask<int64_t> label_map;
Mask<float> score_map; Mask<float> score_map;
std::string type = "seg";
void clear() { void clear() {
label_map.clear(); label_map.clear();
score_map.clear(); score_map.clear();
} }
}; };
} // namespce of PaddleX } // namespace PaddleX
...@@ -83,7 +83,7 @@ class ResizeByShort : public Transform { ...@@ -83,7 +83,7 @@ class ResizeByShort : public Transform {
} else { } else {
max_size_ = -1; max_size_ = -1;
} }
}; }
virtual bool Run(cv::Mat* im, ImageBlob* data); virtual bool Run(cv::Mat* im, ImageBlob* data);
private: private:
...@@ -96,7 +96,7 @@ class ResizeByLong : public Transform { ...@@ -96,7 +96,7 @@ class ResizeByLong : public Transform {
public: public:
virtual void Init(const YAML::Node& item) { virtual void Init(const YAML::Node& item) {
long_size_ = item["long_size"].as<int>(); long_size_ = item["long_size"].as<int>();
}; }
virtual bool Run(cv::Mat* im, ImageBlob* data); virtual bool Run(cv::Mat* im, ImageBlob* data);
private: private:
...@@ -167,9 +167,6 @@ class Padding : public Transform { ...@@ -167,9 +167,6 @@ class Padding : public Transform {
height_ = item["target_size"].as<std::vector<int>>()[1]; height_ = item["target_size"].as<std::vector<int>>()[1];
} }
} }
if (item["im_padding_value"].IsDefined()) {
value_ = item["im_padding_value"].as<std::vector<float>>();
}
} }
virtual bool Run(cv::Mat* im, ImageBlob* data); virtual bool Run(cv::Mat* im, ImageBlob* data);
...@@ -177,7 +174,6 @@ class Padding : public Transform { ...@@ -177,7 +174,6 @@ class Padding : public Transform {
int coarsest_stride_ = -1; int coarsest_stride_ = -1;
int width_ = 0; int width_ = 0;
int height_ = 0; int height_ = 0;
std::vector<float> value_;
}; };
class Transforms { class Transforms {
......
...@@ -65,6 +65,15 @@ bool Model::load_config(const std::string& model_dir) { ...@@ -65,6 +65,15 @@ bool Model::load_config(const std::string& model_dir) {
YAML::Node config = YAML::LoadFile(yaml_file); YAML::Node config = YAML::LoadFile(yaml_file);
type = config["_Attributes"]["model_type"].as<std::string>(); type = config["_Attributes"]["model_type"].as<std::string>();
name = config["Model"].as<std::string>(); name = config["Model"].as<std::string>();
std::string version = config["version"].as<std::string>();
if (version[0] == '0') {
std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment "
<< "cannot be done, please refer to "
<< "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md "
<< "to transfer version."
<< std::endl;
return false;
}
bool to_rgb = true; bool to_rgb = true;
if (config["TransformsMode"].IsDefined()) { if (config["TransformsMode"].IsDefined()) {
std::string mode = config["TransformsMode"].as<std::string>(); std::string mode = config["TransformsMode"].as<std::string>();
...@@ -89,7 +98,7 @@ bool Model::load_config(const std::string& model_dir) { ...@@ -89,7 +98,7 @@ bool Model::load_config(const std::string& model_dir) {
bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) { bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
cv::Mat im = input_im.clone(); cv::Mat im = input_im.clone();
if (!transforms_.Run(&im, &inputs_)) { if (!transforms_.Run(&im, blob)) {
return false; return false;
} }
return true; return true;
......
...@@ -19,30 +19,30 @@ import argparse ...@@ -19,30 +19,30 @@ import argparse
def export_lite(): def export_lite():
opt = lite.Opt() opt = lite.Opt()
model_file = os.path.join(FLAGS.model_path, '__model__') model_file = os.path.join(FLAGS.model_dir, '__model__')
params_file = os.path.join(FLAGS.model_path, '__params__') params_file = os.path.join(FLAGS.model_dir, '__params__')
opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_dir) opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_file)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(
"--model_path", "--model_dir",
type=str, type=str,
default="", default="",
help="model path.", help="path of '__model__' and '__params__'.",
required=True) required=True)
parser.add_argument( parser.add_argument(
"--place", "--place",
type=str, type=str,
default="arm", default="arm",
help="preprocess config path.", help="run place: 'arm|opencl|x86|npu|xpu|rknpu|apu'.",
required=True) required=True)
parser.add_argument( parser.add_argument(
"--save_dir", "--save_file",
type=str, type=str,
default="paddlex.onnx", default="paddlex.onnx",
help="Directory for storing the output visualization files.", help="file name for storing the output files.",
required=True) required=True)
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
export_lite() export_lite()
...@@ -60,3 +60,9 @@ ...@@ -60,3 +60,9 @@
## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定 ## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定
> 1.可以按照9的方式来解决这个问题 > 1.可以按照9的方式来解决这个问题
> 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载 > 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载
## 12. 程序启动时提示"Failed to execute script PaddleX",如何解决?
> 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。
> 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。
> 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决
> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功
\ No newline at end of file
...@@ -8,7 +8,7 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, ...@@ -8,7 +8,7 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None,
> 仅用于**目标检测**。读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md) > 仅用于**目标检测**。读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_mobilenetv1.py#L29) > 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py#L29)
> **参数** > **参数**
...@@ -21,6 +21,16 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, ...@@ -21,6 +21,16 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None,
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义VOCDetection类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
## CocoDetection类 ## CocoDetection类
``` ```
...@@ -41,6 +51,16 @@ paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers= ...@@ -41,6 +51,16 @@ paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers=
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义CocoDetection类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
## EasyDataDet类 ## EasyDataDet类
``` ```
...@@ -59,5 +79,15 @@ paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, n ...@@ -59,5 +79,15 @@ paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, n
> > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。 > > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。
> > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。 > > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义EasyDataDet类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
# Predictor部署-paddlex.deploy # 预测部署-paddlex.deploy
使用AnalysisPredictor进行预测部署。 使用Paddle Inference进行高性能的Python预测部署。更多关于Paddle Inference信息请参考[Paddle Inference文档](https://paddle-inference.readthedocs.io/en/latest/#)
## Predictor类 ## Predictor类
...@@ -22,6 +22,7 @@ paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_ ...@@ -22,6 +22,7 @@ paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_
> > > >
> > ``` > > ```
> > import paddlex > > import paddlex
> >
> > model = paddlex.deploy.Predictor(model_dir, use_gpu=True) > > model = paddlex.deploy.Predictor(model_dir, use_gpu=True)
> > result = model.predict(image_file) > > result = model.predict(image_file)
> > ``` > > ```
......
...@@ -15,7 +15,7 @@ paddlex.cls.ResNet50(num_classes=1000) ...@@ -15,7 +15,7 @@ paddlex.cls.ResNet50(num_classes=1000)
### train 训练接口 ### train 训练接口
```python ```python
train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None) train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, warmup_steps=0, warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None)
``` ```
> >
> **参数** > **参数**
...@@ -30,12 +30,14 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s ...@@ -30,12 +30,14 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.025。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.025。
> > - **warmup_steps** (int): 默认优化器的warmup步数,学习率将在设定的步数内,从warmup_start_lr线性增长至设定的learning_rate,默认为0。
> > - **warmup_start_lr**(float): 默认优化器的warmup起始学习率,默认为0.0。
> > - **lr_decay_epochs** (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。 > > - **lr_decay_epochs** (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。
> > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。 > > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
...@@ -186,3 +188,7 @@ paddlex.cls.DenseNet161(num_classes=1000) ...@@ -186,3 +188,7 @@ paddlex.cls.DenseNet161(num_classes=1000)
paddlex.cls.DenseNet201(num_classes=1000) paddlex.cls.DenseNet201(num_classes=1000)
``` ```
### HRNet_W18
```python
paddlex.cls.HRNet_W18(num_classes=1000)
```
...@@ -9,7 +9,7 @@ paddlex.det.YOLOv3(num_classes=80, backbone='MobileNetV1', anchors=None, anchor_ ...@@ -9,7 +9,7 @@ paddlex.det.YOLOv3(num_classes=80, backbone='MobileNetV1', anchors=None, anchor_
> 构建YOLOv3检测器。**注意在YOLOv3,num_classes不需要包含背景类,如目标包括human、dog两种,则num_classes设为2即可,这里与FasterRCNN/MaskRCNN有差别** > 构建YOLOv3检测器。**注意在YOLOv3,num_classes不需要包含背景类,如目标包括human、dog两种,则num_classes设为2即可,这里与FasterRCNN/MaskRCNN有差别**
> **参数** > **参数**
> >
> > - **num_classes** (int): 类别数。默认为80。 > > - **num_classes** (int): 类别数。默认为80。
> > - **backbone** (str): YOLOv3的backbone网络,取值范围为['DarkNet53', 'ResNet34', 'MobileNetV1', 'MobileNetV3_large']。默认为'MobileNetV1'。 > > - **backbone** (str): YOLOv3的backbone网络,取值范围为['DarkNet53', 'ResNet34', 'MobileNetV1', 'MobileNetV3_large']。默认为'MobileNetV1'。
> > - **anchors** (list|tuple): anchor框的宽度和高度,为None时表示使用默认值 > > - **anchors** (list|tuple): anchor框的宽度和高度,为None时表示使用默认值
...@@ -42,7 +42,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa ...@@ -42,7 +42,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为20。 > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为20。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的学习率。默认为1.0/8000。 > > - **learning_rate** (float): 默认优化器的学习率。默认为1.0/8000。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为1000。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为1000。
...@@ -53,7 +53,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa ...@@ -53,7 +53,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在PascalVOC数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在PascalVOC数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
...@@ -107,7 +107,7 @@ paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspec ...@@ -107,7 +107,7 @@ paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspec
> **参数** > **参数**
> > - **num_classes** (int): 包含了背景类的类别数。默认为81。 > > - **num_classes** (int): 包含了背景类的类别数。默认为81。
> > - **backbone** (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 > > - **backbone** (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
> > - **with_fpn** (bool): 是否使用FPN结构。默认为True。 > > - **with_fpn** (bool): 是否使用FPN结构。默认为True。
> > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 > > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
> > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 > > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
...@@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, sa ...@@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18的COCO预训练模型);为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.0025。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.0025。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。
......
...@@ -12,7 +12,7 @@ paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_ ...@@ -12,7 +12,7 @@ paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_
> **参数** > **参数**
> > - **num_classes** (int): 包含了背景类的类别数。默认为81。 > > - **num_classes** (int): 包含了背景类的类别数。默认为81。
> > - **backbone** (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 > > - **backbone** (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
> > - **with_fpn** (bool): 是否使用FPN结构。默认为True。 > > - **with_fpn** (bool): 是否使用FPN结构。默认为True。
> > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 > > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
> > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 > > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
...@@ -34,7 +34,7 @@ train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, sa ...@@ -34,7 +34,7 @@ train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18和HRNet_W18的COCO预训练模型);若为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.00125。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.00125。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。
...@@ -82,4 +82,4 @@ predict(self, img_file, transforms=None) ...@@ -82,4 +82,4 @@ predict(self, img_file, transforms=None)
> >
> **返回值** > **返回值**
> >
> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高 > > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景
...@@ -12,7 +12,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride ...@@ -12,7 +12,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride
> **参数** > **参数**
> > - **num_classes** (int): 类别数。 > > - **num_classes** (int): 类别数。
> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],'MobileNetV2_x1.0'。 > > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],默认值为'MobileNetV2_x1.0'。
> > - **output_stride** (int): backbone 输出特征图相对于输入的下采样倍数,一般取值为8或16。默认16。 > > - **output_stride** (int): backbone 输出特征图相对于输入的下采样倍数,一般取值为8或16。默认16。
> > - **aspp_with_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。 > > - **aspp_with_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。
> > - **decoder_use_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。 > > - **decoder_use_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。
...@@ -40,14 +40,14 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev ...@@ -40,14 +40,14 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output' > > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET'。 > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的COCO预训练模型);若为字符串'CITYSCAPES',则自动下载在CITYSCAPES数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
...@@ -124,12 +124,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev ...@@ -124,12 +124,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output' > > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。 > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'COCO',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
...@@ -173,3 +173,173 @@ predict(self, im_file, transforms=None): ...@@ -173,3 +173,173 @@ predict(self, im_file, transforms=None):
> **返回值** > **返回值**
> > > >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。 > > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
## HRNet类
```python
paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255)
```
> 构建HRNet分割器。
> **参数**
> > - **num_classes** (int): 类别数。
> > - **width** (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。
> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
### train 训练接口
```python
train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
```
> HRNet模型训练接口。
> **参数**
> >
> > - **num_epochs** (int): 训练迭代轮数。
> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重(注意:目前仅提供`width`取值为18的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
#### evaluate 评估接口
```
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
```
> HRNet模型评估接口。
> **参数**
> >
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **batch_size** (int): 评估时的batch大小。默认1。
> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
> > - **return_details** (bool): 是否返回详细信息。默认False。
> **返回值**
> >
> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
#### predict 预测接口
```
predict(self, im_file, transforms=None):
```
> HRNet模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
> **参数**
> >
> > - **img_file** (str): 预测图像路径。
> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
> **返回值**
> >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
## FastSCNN类
```python
paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0])
```
> 构建FastSCNN分割器。
> **参数**
> > - **num_classes** (int): 类别数。
> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
> > - **multi_loss_weight** (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
### train 训练接口
```python
train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='CITYSCAPES', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
```
> FastSCNN模型训练接口。
> **参数**
> >
> > - **num_epochs** (int): 训练迭代轮数。
> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'CITYSCAPES'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
#### evaluate 评估接口
```
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
```
> FastSCNN模型评估接口。
> **参数**
> >
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **batch_size** (int): 评估时的batch大小。默认1。
> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
> > - **return_details** (bool): 是否返回详细信息。默认False。
> **返回值**
> >
> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
#### predict 预测接口
```
predict(self, im_file, transforms=None):
```
> FastSCNN模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
> **参数**
> >
> > - **img_file** (str): 预测图像路径。
> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
> **返回值**
> >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
...@@ -9,8 +9,8 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了 ...@@ -9,8 +9,8 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了
| 任务类型 | 增强方法 | | 任务类型 | 增强方法 |
| :------- | :------------| | :------- | :------------|
| 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)[RandomVerticalFlip](cls_transforms.html#randomverticalflip)<br> [RandomRotate](cls_transforms.html#randomratate)[RandomDistort](cls_transforms.html#randomdistort) | | 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)[RandomVerticalFlip](cls_transforms.html#randomverticalflip)<br> [RandomRotate](cls_transforms.html#randomratate)[RandomDistort](cls_transforms.html#randomdistort) |
|目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)[RandomDistort](det_transforms.html#randomdistort)[RandomCrop](det_transforms.html#randomcrop)<br> [[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)RandomExpand](det_transforms.html#randomexpand) | |目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)[RandomDistort](det_transforms.html#randomdistort)[RandomCrop](det_transforms.html#randomcrop)<br> [MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)[RandomExpand](det_transforms.html#randomexpand) |
|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)[RandomVerticalFlip](seg_transforms.html#randomverticalflip)[RandomRangeScaling](seg_transforms.html#randomrangescaling)<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)[RandomBlur](seg_transforms.html#randomblur)<br> [RandomRotation](seg_transforms.html#randomrotation)[RandomScaleAspect](seg_transforms.html#randomscaleaspect)[RandomDistort](seg_transforms.html#randomdistort) | |语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)[RandomVerticalFlip](seg_transforms.html#randomverticalflip)[RandomRangeScaling](seg_transforms.html#randomrangescaling)<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)[RandomBlur](seg_transforms.html#randomblur)<br> [RandomRotate](seg_transforms.html#randomrotate)[RandomScaleAspect](seg_transforms.html#randomscaleaspect)[RandomDistort](seg_transforms.html#randomdistort) |
## imgaug增强库的支持 ## imgaug增强库的支持
......
...@@ -15,7 +15,7 @@ paddlex.cls.transforms.Compose(transforms) ...@@ -15,7 +15,7 @@ paddlex.cls.transforms.Compose(transforms)
## RandomCrop类 ## RandomCrop类
```python ```python
paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3. / 4, upper_ratio=4. / 3) paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3)
``` ```
对图像进行随机剪裁,模型训练时的数据增强操作。 对图像进行随机剪裁,模型训练时的数据增强操作。
...@@ -26,7 +26,7 @@ paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3 ...@@ -26,7 +26,7 @@ paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3
### 参数 ### 参数
* **crop_size** (int): 随机裁剪后重新调整的目标边长。默认为224。 * **crop_size** (int): 随机裁剪后重新调整的目标边长。默认为224。
* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。 * **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。
* **lower_ratio** (float): 宽变换比例的最小限制。默认为3. / 4。 * **lower_ratio** (float): 宽变换比例的最小限制。默认为3. / 4。
* **upper_ratio** (float): 宽变换比例的最小限制。默认为4. / 3。 * **upper_ratio** (float): 宽变换比例的最小限制。默认为4. / 3。
...@@ -122,3 +122,64 @@ paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5, ...@@ -122,3 +122,64 @@ paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5,
* **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。 * **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。
* **hue_range** (int): 色调因子的范围。默认为18。 * **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。
## ComposedClsTransforms类
```python
paddlex.cls.transforms.ComposedClsTransforms(mode, crop_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
分类模型中已经组合好的数据处理流程,开发者可以直接使用ComposedClsTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomCrop](#RandomCrop)[RandomHorizontalFlip](#RandomHorizontalFlip)两种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedClsTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机从图像中crop一块子图,并resize成crop_size大小
> > 2. 将1的输出按0.5的概率随机进行水平翻转
> > 3. 将图像进行归一化
> 验证/预测阶段:
> > 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
> > 2. 从图像中心crop出一个大小为crop_size的图像
> > 3. 将图像进行归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **crop_size** (int|list): 输入到模型里的图像大小,默认为[224, 224](与原图大小无关,根据上述几个步骤,会将原图处理成该图大小输入给模型训练)
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedClsTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.cls import transforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[320, 320])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[320, 320])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.cls import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面两个为通过add_augmenters额外添加的数据增强方式
transforms.RandomCrop(crop_size=320),
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.ResizeByShort(short_size=int(320*1.14)),
transforms.CenterCrop(crop_size=320),
transforms.Normalize()
])
```
...@@ -167,3 +167,133 @@ paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3, ...@@ -167,3 +167,133 @@ paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3,
* **num_attempts** (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。 * **num_attempts** (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。
* **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。 * **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。
* **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。 * **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。
## ComposedRCNNTransforms类
```python
paddlex.det.transforms.ComposedRCNNTransforms(mode, min_max_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
目标检测FasterRCNN和实例分割MaskRCNN模型中已经组合好的数据处理流程,开发者可以直接使用ComposedRCNNTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedRCNNTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机以0.5的概率将图像水平翻转
> > 2. 将图像进行归一化
> > 3. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入
> > 4. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数
> 验证/预测阶段:
> > 1. 将图像进行归一化
> > 2. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入
> > 3. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **min_max_size** (list): 输入模型中图像的最短边长度和最长边长度,参考[ResizeByShort](#ResizeByShort)(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练),默认[800, 1333]
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedRCNNTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面两个为通过add_augmenters额外添加的数据增强方式
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize(),
transforms.ResizeByShort(short_size=800, max_size=1333),
transforms.Padding(coarsest_stride=32)
])
eval_transforms = transforms.Composed([
transforms.Normalize(),
transforms.ResizeByShort(short_size=800, max_size=1333),
transforms.Padding(coarsest_stride=32)
])
```
## ComposedYOLOv3Transforms类
```python
paddlex.det.transforms.ComposedYOLOv3Transforms(mode, shape=[608, 608], mixup_epoch=250, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
目标检测YOLOv3模型中已经组合好的数据处理流程,开发者可以直接使用ComposedYOLOv3Transforms,简化手动组合transforms的过程, 该类中已经包含了[MixupImage](#MixupImage)、[RandomDistort](#RandomDistort)、[RandomExpand](#RandomExpand)、[RandomCrop](#RandomCrop)、[RandomHorizontalFlip](#RandomHorizontalFlip)5种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedYOLOv3Transforms共包括以下几个步骤:
> 训练阶段:
> > 1. 在前mixup_epoch轮迭代中,使用MixupImage策略
> > 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
> > 3. 随机扩充图像
> > 4. 随机裁剪图像
> > 5. 将4步骤的输出图像Resize成shape参数的大小
> > 6. 随机0.5的概率水平翻转图像
> > 7. 图像归一化
> 验证/预测阶段:
> > 1. 将图像Resize成shape参数大小
> > 2. 图像归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **shape** (list): 输入模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[608, 608]
* **mixup_epoch**(int): 模型训练过程中,在前mixup_epoch轮迭代中,使用mixup策略,如果为-1,则不使用mixup策略, 默认250。
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedYOLOv3Transforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[480, 480])
eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eval', shape=[480, 480])
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面为通过add_augmenters额外添加的数据增强方式
transforms.MixupImage(mixup_epoch=250),
transforms.RandomDistort(),
transforms.RandomExpand(),
transforms.RandomCrop(),
transforms.Resize(target_size=480, interp='RANDOM'),
transforms.RandomHorizontalFlip(prob=0.5),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.Resize(target_size=480, interp='CUBIC'),
transforms.Normalize()
])
```
...@@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1) ...@@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1)
* **prob** (float): 图像模糊概率。默认为0.1。 * **prob** (float): 图像模糊概率。默认为0.1。
## RandomRotation ## RandomRotate
```python ```python
paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255) paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255)
``` ```
...@@ -166,3 +166,63 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5, ...@@ -166,3 +166,63 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5,
* **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。 * **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。
* **hue_range** (int): 色调因子的范围。默认为18。 * **hue_range** (int): 色调因子的范围。默认为18。
* **hue_prob** (float): 随机调整色调的概率。默认为0.5。 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。
## ComposedSegTransforms类
```python
paddlex.det.transforms.ComposedSegTransforms(mode, train_crop_shape=[769, 769], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
```
语义分割DeepLab和UNet模型中已经组合好的数据处理流程,开发者可以直接使用ComposedSegTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)[ResizeStepScaling](#ResizeStepScaling)[RandomPaddingCrop](#RandomPaddingCrop)3种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。
ComposedSegTransforms共包括以下几个步骤:
> 训练阶段:
> > 1. 随机对图像以0.5的概率水平翻转
> > 2. 按不同的比例随机Resize原图
> > 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
> > 4. 图像归一化
> 预测阶段:
> > 1. 图像归一化
### 参数
* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test'
* **train_crop_size** (list): 训练过程中随机Crop和Resize后(验证或预测过程中不需配置该参数,自动使用原图大小),输入到模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[769, 769]
* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。
* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。
### 添加数据增强方式
```python
ComposedSegTransforms.add_augmenters(augmenters)
```
> **参数**
> * **augmenters**(list): 数据增强方式列表
#### 使用示例
```
import paddlex as pdx
from paddlex.seg import transforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[512, 512])
eval_transforms = transforms.ComposedYOLOTransforms(mode='eval')
# 添加数据增强
import imgaug.augmenters as iaa
train_transforms.add_augmenters([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0))
])
```
上面代码等价于
```
import paddlex as pdx
from paddlex.det import transforms
train_transforms = transforms.Composed([
transforms.RandomDistort(),
iaa.blur.GaussianBlur(sigma=(0.0, 3.0)),
# 上面2行为通过add_augmenters额外添加的数据增强方式
transforms.RandomHorizontalFlip(prob=0.5),
transforms.ResizeStepScaling(),
transforms.PaddingCrop(crop_size=[512, 512]),
transforms.Normalize()
])
eval_transforms = transforms.Composed([
transforms.Normalize()
])
```
...@@ -6,47 +6,56 @@ ...@@ -6,47 +6,56 @@
| 模型 | 模型大小 | 预测速度(毫秒) | Top1准确率(%) | Top5准确率(%) | | 模型 | 模型大小 | 预测速度(毫秒) | Top1准确率(%) | Top5准确率(%) |
| :----| :------- | :----------- | :--------- | :--------- | | :----| :------- | :----------- | :--------- | :--------- |
| ResNet18| 46.9MB | 1.499 | 71.0 | 89.9 | | [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar)| 46.2MB | 3.72882 | 71.0 | 89.9 |
| ResNet34| 87.5MB | 2.272 | 74.6 | 92.1 | | [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar)| 87.9MB | 5.50876 | 74.6 | 92.1 |
| ResNet50| 102.7MB | 2.939 | 76.5 | 93.0 | | [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar)| 103.4MB | 7.76659 | 76.5 | 93.0 |
| ResNet101 |179.1MB | 5.314 | 77.6 | 93.6 | | [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) |180.4MB | 13.80876 | 77.6 | 93.6 |
| ResNet50_vd |102.8MB | 3.165 | 79.1 | 94.4 | | [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) |103.5MB | 8.20476 | 79.1 | 94.4 |
| ResNet101_vd| 179.2MB | 5.252 | 80.2 | 95.0 | | [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar)| 180.5MB | 14.24643 | 80.2 | 95.0 |
| ResNet50_vd_ssld |102.8MB | 3.165 | 82.4 | 96.1 | | [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar) |103.5MB | 7.79264 | 82.4 | 96.1 |
| ResNet101_vd_ssld| 179.2MB | 5.252 | 83.7 | 96.7 | | [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar)| 180.5MB | 13.34580 | 83.7 | 96.7 |
| DarkNet53|166.9MB | 3.139 | 78.0 | 94.1 | | [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar)|167.4MB | 8.82047 | 78.0 | 94.1 |
| MobileNetV1 | 16.0MB | 32.523 | 71.0 | 89.7 | | [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 17.4MB | 3.42838 | 71.0 | 89.7 |
| MobileNetV2 | 14.0MB | 23.318 | 72.2 | 90.7 | | [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 15.0MB | 5.92667 | 72.2 | 90.7 |
| MobileNetV3_large| 21.0MB | 19.308 | 75.3 | 93.2 | | [MobileNetV3_large](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar)| 22.8MB | 8.31428 | 75.3 | 93.2 |
| MobileNetV3_small | 12.0MB | 6.546 | 68.2 | 88.1 | | [MobileNetV3_small](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | 12.5MB | 7.30689 | 68.2 | 88.1 |
| MobileNetV3_large_ssld| 21.0MB | 19.308 | 79.0 | 94.5 | | [MobileNetV3_large_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar)| 22.8MB | 8.06651 | 79.0 | 94.5 |
| MobileNetV3_small_ssld | 12.0MB | 6.546 | 71.3 | 90.1 | | [MobileNetV3_small_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar) | 12.5MB | 7.08837 | 71.3 | 90.1 |
| Xception41 |92.4MB | 4.408 | 79.6 | 94.4 | | [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 109.2MB | 8.15611 | 79.6 | 94.4 |
| Xception65 | 144.6MB | 6.464 | 80.3 | 94.5 | | [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 161.6MB | 13.87017 | 80.3 | 94.5 |
| DenseNet121 | 32.8MB | 4.371 | 75.7 | 92.6 | | [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 33.1MB | 17.09874 | 75.7 | 92.6 |
| DenseNet161|116.3MB | 8.863 | 78.6 | 94.1 | | [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar)| 118.0MB | 22.79690 | 78.6 | 94.1 |
| DenseNet201| 84.6MB | 8.173 | 77.6 | 93.7 | | [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar)| 84.1MB | 25.26089 | 77.6 | 93.7 |
| ShuffleNetV2 | 9.0MB | 10.941 | 68.8 | 88.5 | | [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 10.2MB | 15.40138 | 68.8 | 88.5 |
| [HRNet_W18](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar) | 21.29MB |45.25514 | 76.9 | 93.4 |
## 目标检测模型 ## 目标检测模型
> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。 > 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。
| 模型 | 模型大小 | 预测时间(毫秒) | BoxAP(%) | | 模型 | 模型大小 | 预测时间(毫秒) | BoxAP(%) |
|:-------|:-----------|:-------------|:----------| |:-------|:-----------|:-------------|:----------|
|FasterRCNN-ResNet50|135.6MB| 78.450 | 35.2 | |[FasterRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar)|136.0MB| 197.715 | 35.2 |
|FasterRCNN-ResNet50_vd| 135.7MB | 79.523 | 36.4 | |[FasterRCNN-ResNet50_vd](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar)| 136.1MB | 475.700 | 36.4 |
|FasterRCNN-ResNet101| 211.7MB | 107.342 | 38.3 | |[FasterRCNN-ResNet101](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar)| 212.5MB | 582.911 | 38.3 |
|FasterRCNN-ResNet50-FPN| 167.2MB | 44.897 | 37.2 | |[FasterRCNN-ResNet50-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar)| 167.7MB | 83.189 | 37.2 |
|FasterRCNN-ResNet50_vd-FPN|168.7MB | 45.773 | 38.9 | |[FasterRCNN-ResNet50_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar)|167.8MB | 128.277 | 38.9 |
|FasterRCNN-ResNet101-FPN| 251.7MB | 55.782 | 38.7 | |[FasterRCNN-ResNet101-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar)| 244.2MB | 156.097 | 38.7 |
|FasterRCNN-ResNet101_vd-FPN |252MB | 58.785 | 40.5 | |[FasterRCNN-ResNet101_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) |244.3MB | 119.788 | 40.5 |
|YOLOv3-DarkNet53|252.4MB | 21.944 | 38.9 | |[FasterRCNN-HRNet_W18-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_1x.tar) |115.5MB | 81.592 | 36 |
|YOLOv3-MobileNetv1 |101.2MB | 12.771 | 29.3 | |[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar)|249.2MB | 42.672 | 38.9 |
|YOLOv3-MobileNetv3|94.6MB | - | 31.6 | |[YOLOv3-MobileNetV1](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) |99.2MB | 15.442 | 29.3 |
| YOLOv3-ResNet34|169.7MB | 15.784 | 36.2 | |[YOLOv3-MobileNetV3_large](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams)|100.7MB | 143.322 | 31.6 |
| [YOLOv3-ResNet34](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar)|170.3MB | 23.185 | 36.2 |
## 实例分割模型 ## 实例分割模型
> 表中模型相关指标均为在MSCOCO数据集上测试得到。 > 表中模型相关指标均为在MSCOCO数据集上测试得到。
| 模型 | 模型大小 | 预测时间(毫秒) | mIoU(%) |
|:-------|:-----------|:-------------|:----------|
|DeepLabv3+-MobileNetV2_x1.0|-| - | - |
|DeepLabv3+-Xception41|-| - | - |
|DeepLabv3+-Xception65|-| - | - |
|UNet|-| - | - |
|HRNet_w18|-| - | - |
...@@ -23,3 +23,35 @@ Batch Size指模型在训练过程中,一次性处理的样本数量, 如若 ...@@ -23,3 +23,35 @@ Batch Size指模型在训练过程中,一次性处理的样本数量, 如若
- [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train) - [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train)
- [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train) - [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train)
- [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2) - [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2)
## 关于lr_decay_epoch, warmup_steps等参数的说明
在PaddleX或其它深度学习模型的训练过程中,经常见到lr_decay_epoch, warmup_steps, warmup_start_lr等参数设置,下面介绍一些这些参数的作用。
首先这些参数都是用于控制模型训练过程中学习率的变化方式,例如我们在训练时将learning_rate设为0.1, 通常情况,在模型的训练过程中,学习率一直以0.1不变训练下去, 但为了调出更好的模型效果,我们往往不希望学习率一直保持不变。
### warmup_steps和warmup_start_lr
我们在训练模型时,一般都会使用预训练模型,例如检测模型在训练时使用backbone在ImageNet数据集上的预训练权重。但由于在自行训练时,自己的数据与ImageNet数据集存在较大的差异,可能会一开始由于梯度过大使得训练出现问题,因此可以在刚开始训练时,让学习率以一个较小的值,慢慢增长到设定的学习率。因此`warmup_steps``warmup_start_lr`就是这个作用,模型开始训练时,学习率会从`warmup_start_lr`开始,在`warmup_steps`内线性增长到设定的学习率。
### lr_decay_epochs和lr_decay_gamma
`lr_decay_epochs`用于让学习率在模型训练后期逐步衰减,它一般是一个list,如[6, 8, 10],表示学习率在第6个epoch时衰减一次,第8个epoch时再衰减一次,第10个epoch时再衰减一次。每次学习率衰减为之前的学习率*lr_decay_gamma
### Notice
在PaddleX中,限制warmup需要在第一个学习率decay衰减前结束,因此要满足下面的公式
```
warmup_steps <= lr_decay_epochs[0] * num_steps_each_epoch
```
其中公式中`num_steps_each_epoch = num_samples_in_train_dataset // train_batch_size`
> 因此如若在训练时PaddleX提示`warmup_steps should be less than xxx`时,即可根据上述公式来调整你的`lr_decay_epochs`或者是`warmup_steps`使得两个参数满足上面的条件
> - 图像分类模型 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#train)
> - FasterRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn)
> - YOLOv3 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3)
> - MaskRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn)
> - DeepLab [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p)
> - UNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet)
> - HRNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet)
...@@ -26,13 +26,14 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习 ...@@ -26,13 +26,14 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习
cv_solutions.md cv_solutions.md
apis/index.rst apis/index.rst
paddlex_gui/index.rst paddlex_gui/index.rst
tuning_strategy/index.rst
update.md update.md
FAQ.md FAQ.md
appendix/index.rst appendix/index.rst
* PaddleX版本: v1.0.0 * PaddleX版本: v1.0.0
* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex * 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop * 项目GitHub: https://github.com/PaddlePaddle/PaddleX
* 官方QQ用户群: 1045148026 * 官方QQ用户群: 1045148026
* GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues * GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues
# PaddleX GUI下载安装 ## <a name="2">PaddleX GUI安装</a>
PaddleX GUI是提升项目开发效率的核心模块,开发者可快速完成深度学习模型全流程开发。我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddle/paddleX)下载试用PaddleX GUI可视化前端,并获得您宝贵的意见或开源项目贡献。
#### <a name="1">安装推荐环境</a>
* **操作系统**
* Windows7/8/10(推荐Windows 10);
* Mac OS 10.13+;
* Ubuntu 18.04+;
***注:处理器需为x86_64架构,支持MKL。***
* **训练硬件**
* **GPU**(仅Windows及Linux系统):
推荐使用支持CUDA的NVIDIA显卡,例如:GTX 1070+以上性能的显卡;
Windows系统X86_64驱动版本>=411.31;
Linux系统X86_64驱动版本>=410.48;
显存8G以上;
* **CPU**
PaddleX当前支持您用本地CPU进行训练,但推荐使用GPU以获得更好的开发体验。
* **内存**:建议8G以上
* **硬盘空间**:建议SSD剩余空间1T以上(非必须)
***注:PaddleX在Windows及Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。***
# PaddleX GUI如何训练模型 # PaddleX GUI使用文档
飞桨全流程开发工具,集飞桨核心框架、模型库、工具及组件等深度学习开发全流程所需能力于一身,易用易集成,是开发者快速入门深度学习、提升深度学习项目开发效率的最佳辅助工具。
PaddleX GUI是一个应用PaddleX实现的一个图形化开发客户端产品,它使得开发者通过键入式输入即可完成深度学习模型全流程开发,可大幅度提升项目开发效率。飞桨团队期待各位开发者基于PaddleX,实现出各种符合自己产业实际需求的产品。
我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddlex)下载试用PaddleX GUI,并获得您宝贵的意见或开源项目贡献。
## 目录
* **产品特性**
* **PaddleX GUI可视化前端**
* **FAQ**
## 产品特性
\- **全流程打通**
将深度学习开发全流程打通,并提供可视化开发界面, 省去了对各环节API的熟悉过程及重复的代码开发,极大地提升了开发效率。
\- **易用易集成**
提供功能最全、最灵活的Python API开发模式,完全开源开放,易于集成和二次开发。键入式输入的图形化开发界面,使得非专业算法人员也可快速进行业务POC。
\- **融合产业实践经验**
融合飞桨产业落地经验,精选高质量的视觉模型方案,开放实际的案例教学,手把手带您实现产业需求落地。
\- **教程与服务**
从数据集准备到上线部署,为您提供业务开发全流程的文档说明及技术服务。开发者可以通过QQ群、微信群、GitHub社区等多种形式与飞桨团队及同业合作伙伴交流沟通。
## PaddleX GUI 可视化前端
**第一步:准备数据**
在开始模型训练前,您需要根据不同的任务类型,将数据标注为相应的格式。目前PaddleX支持【图像分类】、【目标检测】、【语义分割】、【实例分割】四种任务类型。不同类型任务的数据处理方式可查看[数据标注方式](https://paddlex.readthedocs.io/zh_CN/latest/appendix/datasets.html)
**第二步:导入我的数据集**
①数据标注完成后,您需要根据不同的任务,将数据和标注文件,按照客户端提示更名并保存到正确的文件中。
②在客户端新建数据集,选择与数据集匹配的任务类型,并选择数据集对应的路径,将数据集导入。
![](images/datasets1.jpg)
③选定导入数据集后,客户端会自动校验数据及标注文件是否合规,校验成功后,您可根据实际需求,将数据集按比例划分为训练集、验证集、测试集。
④您可在「数据分析」模块按规则预览您标注的数据集,双击单张图片可放大查看。
![](images/dataset2.jpg)
**第三步:创建项目**
① 在完成数据导入后,您可以点击「新建项目」创建一个项目。
② 您可根据实际任务需求选择项目的任务类型,需要注意项目所采用的数据集也带有任务类型属性,两者需要进行匹配。
![](images/project3.jpg)
**第四步:项目开发**
**数据选择**:项目创建完成后,您需要选择已载入客户端并校验后的数据集,并点击下一步,进入参数配置页面。
![](images/project1.jpg)
**参数配置**:主要分为**模型参数****训练参数****优化策略**三部分。您可根据实际需求选择模型结构、骨架网络及对应的训练参数、优化策略,使得任务效果最佳。
![](images/project2.jpg)
参数配置完成后,点击启动训练,模型开始训练并进行效果评估。
**训练可视化**:在训练过程中,您可通过VisualDL查看模型训练过程参数变化、日志详情,及当前最优的训练集和验证集训练指标。模型在训练过程中通过点击"中止训练"随时中止训练过程。
![](images/visualization1.jpg)
模型训练结束后,可选择进入『模型剪裁分析』或者直接进入『模型评估』。
![](images/visualization2.jpg)
**模型裁剪**:如果开发者希望减少模型的体积、计算量,提升模型在设备上的预测性能,可以采用PaddleX提供的模型裁剪策略。裁剪过程将对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪,再进行精调训练获得最终裁剪后的模型。
![](images/visualization3.jpg)
**模型评估**:在模型评估页面,您可查看训练后的模型效果。评估方法包括混淆矩阵、精度、召回率等。
![](images/visualization4.jpg)
您还可以选择『数据集切分』时留出的『测试数据集』或从本地文件夹中导入一张/多张图片,将训练后的模型进行测试。根据测试结果,您可决定是否将训练完成的模型保存为预训练模型并进入模型发布页面,或返回先前步骤调整参数配置重新进行训练。
![](images/visualization5.jpg)
**第五步:模型发布**
当模型效果满意后,您可根据实际的生产环境需求,选择将模型发布为需要的版本。
![](images/publish.jpg)
## FAQ
1. **为什么训练速度这么慢?**
PaddleX完全采用您本地的硬件进行计算,深度学习任务确实对算力要求较高,为了使您能快速体验应用PaddleX进行开发,我们适配了CPU硬件,但强烈建议您使用GPU以提升训练速度和开发体验。
2. **我可以在服务器或云平台上部署PaddleX么?**
PaddleX GUI是一个适配本地单机安装的客户端,无法在服务器上直接进行部署,您可以直接使用PaddleX API,或采用飞桨核心框架进行服务器上的部署。如果您希望使用公有算力,强烈建议您尝试飞桨产品系列中的 [EasyDL](https://ai.baidu.com/easydl/)[AI Studio](https://aistudio.baidu.com/aistudio/index)进行开发。
3. **PaddleX支持EasyData标注的数据吗?**
支持,PaddleX可顺畅读取EasyData标注的数据。但当前版本的PaddleX GUI暂时无法支持直接导入EasyData数据格式,您可以参照文档,将[数据集进行转换](https://paddlex.readthedocs.io/zh_CN/latest/appendix/how_to_convert_dataset.html)再导入PaddleX GUI进行后续开发。
同时,我们也在紧密开发PaddleX GUI可直接导入EasyData数据格式的功能。
4. **为什么模型裁剪分析耗时这么长?**
模型裁剪分析过程是对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪。此过程需要重复多次直至FLOPS满足要求,最后再进行精调训练获得最终裁剪后的模型,因此耗时较长。有关模型裁剪的原理,可参见文档[剪裁原理介绍](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)
5. **如何调用后端代码?**
PaddleX 团队为您整理了相关的API接口文档,方便您学习和使用。具体请参见[PaddleX API说明文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/index.html)
**如果您有更多问题或建议,欢迎以issue的形式,或加入PaddleX官方QQ群(1045148026)直接反馈您的问题和需求**
![](images/QR.jpg)
...@@ -21,7 +21,7 @@ PaddleX GUI是基于PaddleX开发实现的可视化模型训练套件,可以 ...@@ -21,7 +21,7 @@ PaddleX GUI是基于PaddleX开发实现的可视化模型训练套件,可以
how_to_use.md how_to_use.md
xx.md xx.md
* PaddleX版本: v0.1.7 * PaddleX GUI版本: v1.0
* 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex * 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex
* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop * 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop
* 官方QQ用户群: 1045148026 * 官方QQ用户群: 1045148026
......
...@@ -61,7 +61,7 @@ eval_dataset = pdx.datasets.ImageNet( ...@@ -61,7 +61,7 @@ eval_dataset = pdx.datasets.ImageNet(
本文档中使用百度基于蒸馏方法得到的MobileNetV3预训练模型,模型结构与MobileNetV3一致,但精度更高。PaddleX内置了20多种分类模型,查阅[PaddleX模型库](appendix/model_zoo.md)了解更多分类模型。 本文档中使用百度基于蒸馏方法得到的MobileNetV3预训练模型,模型结构与MobileNetV3一致,但精度更高。PaddleX内置了20多种分类模型,查阅[PaddleX模型库](appendix/model_zoo.md)了解更多分类模型。
``` ```
num_classes = len(train_dataset.labels) num_classes = len(train_dataset.labels)
model.pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes) model = pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes)
``` ```
### 3.4 定义训练参数 ### 3.4 定义训练参数
...@@ -86,7 +86,7 @@ python train.py ...@@ -86,7 +86,7 @@ python train.py
## 5. 训练过程中查看训练指标 ## 5. 训练过程中查看训练指标
模型在训练过程中,所有的迭代信息将以标注输出流的形式,输出到命令执行的终端上,用户也可通过visualdl以可视化的方式查看训练指标的变化,通过如下方式启动visualdl后,在浏览器打开https://0.0.0.0:8001 (或 https://localhost:8001)即可。 模型在训练过程中,所有的迭代信息将以标注输出流的形式,输出到命令执行的终端上,用户也可通过visualdl以可视化的方式查看训练指标的变化,通过如下方式启动visualdl后,在浏览器打开https://0.0.0.0:8001 (或 https://localhost:8001)即可。
``` ```
visualdl --logdir output/mobilenetv2/vdl_log --port 8000 visualdl --logdir output/mobilenetv2/vdl_log --port 8001
``` ```
![](./images/vdl1.jpg) ![](./images/vdl1.jpg)
......
模型压缩
============================
.. toctree::
:maxdepth: 2
prune.md
quant.md
# 模型裁剪
## 原理介绍
模型裁剪用于减小模型的计算量和体积,可以加快模型部署后的预测速度,是一种减小模型大小和降低模型计算复杂度的常用方式,通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小来实现,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**
## 裁剪方法
PaddleX提供了两种方式:
**1.用户自行计算裁剪配置(推荐),整体流程包含三个步骤,**
> **第一步**: 使用数据集训练原始模型
> **第二步**:利用第一步训练好的模型,在验证数据集上计算模型中各个参数的敏感度,并将敏感度信息存储至本地文件
> **第三步**:使用数据集训练裁剪模型(与第一步差异在于需要在`train`接口中,将第二步计算得到的敏感信息文件传给接口的`sensitivities_file`参数)
> 在如上三个步骤中,**相当于模型共需要训练两遍**,分别对应第一步和第三步,但其中第三步训练的是裁剪后的模型,因此训练速度较第一步会更快。
> 第二步会遍历模型中的部分裁剪参数,分别计算各个参数裁剪后对于模型在验证集上效果的影响,**因此会反复在验证集上评估多次**。
**2.使用PaddleX内置的裁剪方案**
> PaddleX内置的模型裁剪方案是**基于标准数据集**上计算得到的参数敏感度信息,由于不同数据集特征分布会有较大差异,所以该方案相较于第1种方案训练得到的模型**精度一般而言会更低**(**且用户自定义数据集与标准数据集特征分布差异越大,导致训练的模型精度会越低**),仅在用户想节省时间的前提下可以参考使用,使用方式只需一步,
> **一步**: 使用数据集训练裁剪模型,在训练调用`train`接口时,将接口中的`sensitivities_file`参数设置为'DEFAULT'字符串
> 注:各模型内置的裁剪方案分别依据的数据集为: 图像分类——ImageNet数据集、目标检测——PascalVOC数据集、语义分割——CityScape数据集
## 裁剪实验
基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示,
### 图像分类
实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,见[使用教程-模型压缩-图像分类](../tutorials/compress/classification.md)
| 模型 | 裁剪情况 | 模型大小 | Top1准确率(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- |:----------|
|MobileNetV2 | 无裁剪(原模型)| 13.0M | 97.50|6.47ms |47.44ms |
|MobileNetV2 | 方案一(eval_metric_loss=0.10) | 2.1M | 99.58 |5.03ms |20.22ms |
|MobileNetV2 | 方案二(eval_metric_loss=0.10) | 6.0M | 99.58 |5.42ms |29.06ms |
### 目标检测
实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,见[使用教程-模型压缩-目标检测](../tutorials/compress/detection.md)
| 模型 | 裁剪情况 | 模型大小 | MAP(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- | :---------|
|YOLOv3-MobileNetV1 | 无裁剪(原模型)| 139M | 67.57| 14.88ms |976.42ms |
|YOLOv3-MobileNetV1 | 方案一(eval_metric_loss=0.10) | 34M | 75.49 |10.60ms |558.49ms |
|YOLOv3-MobileNetV1 | 方案二(eval_metric_loss=0.05) | 29M | 50.27| 9.43ms |360.46ms |
### 语义分割
实验背景:使用UNet模型,数据集为视盘分割示例数据, 见[使用教程-模型压缩-语义分割](../tutorials/compress/segmentation.md)
| 模型 | 裁剪情况 | 模型大小 | mIOU(%) |GPU预测速度 | CPU预测速度 |
| :-----| :--------| :-------- | :---------- |:---------- | :---------|
|UNet | 无裁剪(原模型)| 77M | 91.22 |33.28ms |9523.55ms |
|UNet | 方案一(eval_metric_loss=0.10) |26M | 90.37 |21.04ms |3936.20ms |
|UNet | 方案二(eval_metric_loss=0.10) |23M | 91.21 |18.61ms |3447.75ms |
# 模型量化
## 原理介绍
为了满足低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,定点量化被提出。为此我们提供了训练后量化,该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。
## 使用PaddleX量化模型
PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../apis/slim.md)
## 量化性能对比
模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html)
目标检测
============================
PaddleX针对目标检测任务提供了通过负样本学习降低误检率的策略,用户可根据需求及应用场景使用该策略对模型进行调优。
.. toctree::
:maxdepth: 1
negatives_training.md
# 通过负样本学习降低误检率
## 应用场景
在背景和目标相似的场景下,模型容易把背景误检成目标。为了降低误检率,可以通过负样本学习来降低误检率,即在训练过程中把无目标真值的图片加入训练。
## 效果对比
* 与基准模型相比,通过负样本学习后的模型**mmAP有3.6%的提升,mAP有0.1%的提升**
* 与基准模型相比,通过负样本学习后的模型在背景图片上的图片级别**误检率降低了49.68%**
表1 违禁品验证集上**框级别精度**对比
||mmAP(AP@IoU=0.5:0.95)| mAP (AP@IoU=0.5)|
|:---|:---|:---|
|基准模型 | 45.8% | 83% |
|通过负样本学习后的模型 | 49.4% | 83.1% |
表2 违禁品验证集上**图片级别的召回率**、无违禁品验证集上**图片级别的误检率**对比
||违禁品图片级别的召回率| 无违禁品图片级别的误检率|
|:---|:--------------------|:------------------------|
|基准模型 | 98.97% | 55.27% |
|通过负样本学习后的模型 | 97.75% | 5.59% |
【名词解释】
* 图片级别的召回率:只要在有目标的图片上检测出目标(不论框的个数),该图片被认为召回。批量有目标图片中被召回图片所占的比例,即为图片级别的召回率。
* 图片级别的误检率:只要在无目标的图片上检测出目标(不论框的个数),该图片被认为误检。批量无目标图片中被误检图片所占的比例,即为图片级别的误检率。
## 使用方法
在定义训练所用的数据集之后,使用数据集类的成员函数`add_negative_samples`将无目标真值的背景图片所在路径传入给训练集。代码示例如下:
```
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 定义训练和验证时的transforms
train_transforms = transforms.ComposedRCNNTransforms(
mode='train', min_max_size=[600, 1000])
eval_transforms = transforms.ComposedRCNNTransforms(
mode='eval', min_max_size=[600, 1000])
# 定义训练所用的数据集
train_dataset = pdx.datasets.CocoDetection(
data_dir='jinnan2_round1_train_20190305/restricted/',
ann_file='jinnan2_round1_train_20190305/train.json',
transforms=train_transforms,
shuffle=True,
num_workers=2)
# 训练集中加入无目标背景图片
train_dataset.add_negative_samples(
'jinnan2_round1_train_20190305/normal_train_back/')
# 定义验证所用的数据集
eval_dataset = pdx.datasets.CocoDetection(
data_dir='jinnan2_round1_train_20190305/restricted/',
ann_file='jinnan2_round1_train_20190305/val.json',
transforms=eval_transforms,
num_workers=2)
# 初始化模型,并进行训练
model = pdx.det.FasterRCNN(num_classes=len(train_dataset.labels) + 1)
model.train(
num_epochs=17,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
train_batch_size=8,
learning_rate=0.01,
lr_decay_epochs=[13, 16],
save_dir='./output')
```
## 实验细则
(1) 数据集
我们使用X光违禁品数据集对通过负样本学习降低误检率的策略有效性进行了实验验证。该数据集中背景比较繁杂,很多背景物体与目标物体较为相似。
* 检测铁壳打火机、黑钉打火机 、刀具、电源和电池、剪刀5种违禁品。
* 训练集有883张违禁品图片,验证集有98张违禁品图片。
* 无违禁品的X光图片有2540张。
(2) 基准模型
使用FasterRCNN-ResNet50作为检测模型,除了水平翻转外没有使用其他的数据增强方式,只使用违禁品训练集进行训练。模型在违禁品验证集上的精度见表1,mmAP有45.8%,mAP达到83%。
(3) 通过负样本学习后的模型
把无违禁品的X光图片按1:1分成无违禁品训练集和无违禁品验证集。我们将基准模型在无违禁品验证集进行测试,发现图片级别的误检率高达55.27%。为了降低该误检率,将基准模型在无违禁品训练集进行测试,挑选出被误检图片共663张,将这663张图片加入训练,训练参数配置与基准模型训练时一致。
通过负样本学习后的模型在违禁品验证集上的精度见表1,mmAP有49.4%,mAP达到83.1%。与基准模型相比,**mmAP有3.6%的提升,mAP有0.1%的提升**。通过负样本学习后的模型在无违禁品验证集的误检率仅有5.58%,与基准模型相比,**误检率降低了49.68%**
此外,还测试了两个模型在有违禁品验证集上图片级别的召回率,见表2,与基准模型相比,通过负样本学习后的模型仅漏检了1张图片,召回率几乎是无损的。
PaddleX调优策略介绍
============================
.. toctree::
:maxdepth: 2
detection/index.rst
# 数据准备 # 数据准备
## 数据标注 该部分内容已迁移至[附录](../appendix/datasets.md)
## 主流标注软件支持
## EasyData数据标注支持
# 移动端部署 # 移动端部署
PaddleX的移动端部署由PaddleLite实现,部署的流程如下,首先将训练好的模型导出为inference model,然后使用PaddleLite的python接口对模型进行优化,最后使用PaddleLite的预测库进行部署,
PaddleLite的详细介绍和使用可参考:[PaddleLite文档](https://paddle-lite.readthedocs.io/zh/latest/)
> PaddleX --> Inference Model --> PaddleLite Opt --> PaddleLite Inference
以下介绍如何将PaddleX导出为inference model,然后使用PaddleLite的OPT模块对模型进行优化:
step 1: 安装PaddleLite step 1: 安装PaddleLite
``` ```
...@@ -9,14 +16,21 @@ pip install paddlelite ...@@ -9,14 +16,21 @@ pip install paddlelite
step 2: 将PaddleX模型导出为inference模型 step 2: 将PaddleX模型导出为inference模型
参考[导出inference模型](deploy_server/deploy_python.html#inference)将模型导出为inference格式模型。 参考[导出inference模型](deploy_server/deploy_python.html#inference)将模型导出为inference格式模型。
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](./upgrade_version.md)对模型版本进行升级。**
step 3: 将inference模型转换成PaddleLite模型 step 3: 将inference模型转换成PaddleLite模型
``` ```
python /path/to/PaddleX/deploy/lite/export_lite.py --model_path /path/to/inference_model --save_dir /path/to/onnx_model python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/onnx_model --place place/to/run
``` ```
`--model_path`用于指定inference模型的路径,`--save_dir`用于指定Lite模型的保存路径。 | 参数 | 说明 |
| ---- | ---- |
| model_dir | 预测模型所在路径,包含"__model__", "__params__"文件 |
| save_file | 模型输出的名称,默认为"paddlex.nb" |
| place | 运行的平台,可选:arm|opencl|x86|npu|xpu|rknpu|apu |
step 4: 预测 step 4: 预测
......
...@@ -96,6 +96,17 @@ cmake .. \ ...@@ -96,6 +96,17 @@ cmake .. \
make make
``` ```
**注意:** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML,如果编译环境无法访问外网,可手动下载:
- [opencv3gcc4.8.tar.bz2](https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2)
- [paddlex-encryption.zip](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)
- [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
opencv3gcc4.8.tar.bz2文件下载后解压,然后在script/build.sh中指定`OPENCE_DIR`为解压后的路径。
paddlex-encryption.zip文件下载后解压,然后在script/build.sh中指定`ENCRYPTION_DIR`为解压后的路径。
yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
修改脚本设置好主要参数后,执行`build`脚本: 修改脚本设置好主要参数后,执行`build`脚本:
```shell ```shell
...@@ -104,7 +115,9 @@ make ...@@ -104,7 +115,9 @@ make
### Step5: 预测及可视化 ### Step5: 预测及可视化
参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。 **在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
> **注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifer``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: 编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifer``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
...@@ -116,7 +129,7 @@ make ...@@ -116,7 +129,7 @@ make
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) | | use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
| gpu_id | GPU 设备ID, 默认值为0 | | gpu_id | GPU 设备ID, 默认值为0 |
| save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 | | save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** |
## 样例 ## 样例
......
...@@ -86,7 +86,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens ...@@ -86,7 +86,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
| OPENCV_DIR | OpenCV的安装路径, | | OPENCV_DIR | OpenCV的安装路径, |
| PADDLE_DIR | Paddle c++预测库的路径 | | PADDLE_DIR | Paddle c++预测库的路径 |
**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的``去掉勾 2. 如果使用的是`openblas`版本,请把`WITH_MKL`的``去掉勾 **注意:**
1. 使用`CPU`版预测库,请把`WITH_GPU`的``去掉勾
2. 如果使用的是`openblas`版本,请把`WITH_MKL`的``去掉勾
3. Windows环境下编译会自动下载YAML,如果编译环境无法访问外网,可手动下载: [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
![step4](../../images/vs2019_step5.png) ![step4](../../images/vs2019_step5.png)
...@@ -99,7 +106,10 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens ...@@ -99,7 +106,10 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
### Step5: 预测及可视化 ### Step5: 预测及可视化
参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。
**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
...@@ -122,7 +132,7 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release ...@@ -122,7 +132,7 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
## 样例 ## 样例
可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。 可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测。
`样例一`: `样例一`:
......
...@@ -20,6 +20,8 @@ paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./infer ...@@ -20,6 +20,8 @@ paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./infer
``` ```
## 预测部署 ## 预测部署
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。**
> 点击下载测试图片 [xiaoduxiong_test_image.tar.gz](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_test_image.tar.gz) > 点击下载测试图片 [xiaoduxiong_test_image.tar.gz](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_test_image.tar.gz)
``` ```
......
...@@ -61,7 +61,7 @@ paddlex-encryption ...@@ -61,7 +61,7 @@ paddlex-encryption
./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model ./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
``` ```
`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted``__params__.encrypted``model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=` `-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`**注意**:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted``__params__.encrypted``model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
![](../images/encrypt.png) ![](../images/encrypt.png)
......
# 模型版本升级
由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,用户需要按照以下步骤对模型版本进行转换,转换后的模型可以在多端上完成部署。
## 检查模型版本
存放模型的文件夹存有一个`model.yml`文件,该文件的最后一行`version`值表示模型的版本号,若版本号小于1.0.0,则需要进行版本转换,若版本号大于及等于1.0.0,则不需要进行版本转换。
## 版本转换
```
paddlex --export_inference --model_dir=/path/to/low_version_model --save_dir=SSpath/to/high_version_model
```
`--model_dir`为版本号小于1.0.0的模型路径,可以是PaddleX训练过程保存的模型,也可以是导出为inference格式的模型。`--save_dir`为转换为高版本的模型,后续可用于多端部署。
\ No newline at end of file
# 使用教程——训练模型
本目录下整理了使用PaddleX训练模型的示例代码,代码中均提供了示例数据的自动下载,并均使用单张GPU卡进行训练。
|代码 | 模型任务 | 数据 |
|------|--------|---------|
|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 |
|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 |
|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 |
|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 |
|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 |
|segmentation/unet.py | 语义分割UNet | 视盘分割 |
|segmentation/hrnet.py | 语义分割HRNet | 视盘分割 |
|segmentation/fast_scnn.py | 语义分割FastSCNN | 视盘分割 |
## 开始训练
在安装PaddleX后,使用如下命令开始训练
```
python classification/mobilenetv2.py
```
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.cls import transforms
import paddlex as pdx
# 下载和解压蔬菜分类数据集
veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/val_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
lr_decay_epochs=[4, 6, 8],
learning_rate=0.025,
save_dir='output/mobilenetv2',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddle.fluid as fluid
from paddlex.cls import transforms
import paddlex as pdx
# 下载和解压蔬菜分类数据集
veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/val_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
# PaddleX支持自定义构建优化器
step_each_epoch = train_dataset.num_samples // 32
learning_rate = fluid.layers.cosine_decay(
learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(4e-5))
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
optimizer=optimizer,
save_dir='output/resnet50',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压昆虫检测数据集
insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn
num_classes = len(train_dataset.labels) + 1
model = pdx.det.FasterRCNN(num_classes=num_classes)
model.train(
num_epochs=12,
train_dataset=train_dataset,
train_batch_size=2,
eval_dataset=eval_dataset,
learning_rate=0.0025,
lr_decay_epochs=[8, 11],
save_dir='output/faster_rcnn_r50_fpn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压小度熊分拣数据集
xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection
train_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/train.json',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/val.json',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn
num_classes = len(train_dataset.labels) + 1
model = pdx.det.MaskRCNN(num_classes=num_classes)
model.train(
num_epochs=12,
train_dataset=train_dataset,
train_batch_size=1,
eval_dataset=eval_dataset,
learning_rate=0.00125,
warmup_steps=10,
lr_decay_epochs=[8, 11],
save_dir='output/mask_rcnn_r50_fpn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压昆虫检测数据集
insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms
train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608])
eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3
num_classes = len(train_dataset.labels)
model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
model.train(
num_epochs=270,
train_dataset=train_dataset,
train_batch_size=8,
eval_dataset=eval_dataset,
learning_rate=0.000125,
lr_decay_epochs=[210, 240],
save_dir='output/yolov3_darknet53',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
train_transforms.add_augmenters([
transforms.RandomRotate()
])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p
num_classes = len(train_dataset.labels)
model = pdx.seg.DeepLabv3p(num_classes=num_classes)
model.train(
num_epochs=40,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/deeplab',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(
mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
num_classes = len(train_dataset.labels)
model = pdx.seg.FastSCNN(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/fastscnn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
num_classes = len(train_dataset.labels)
model = pdx.seg.HRNet(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/hrnet',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet
num_classes = len(train_dataset.labels)
model = pdx.seg.UNet(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/unet',
use_vdl=True)
...@@ -53,4 +53,4 @@ log_level = 2 ...@@ -53,4 +53,4 @@ log_level = 2
from . import interpret from . import interpret
__version__ = '1.0.2.github' __version__ = '1.0.6'
...@@ -36,5 +36,7 @@ DenseNet121 = cv.models.DenseNet121 ...@@ -36,5 +36,7 @@ DenseNet121 = cv.models.DenseNet121
DenseNet161 = cv.models.DenseNet161 DenseNet161 = cv.models.DenseNet161
DenseNet201 = cv.models.DenseNet201 DenseNet201 = cv.models.DenseNet201
ShuffleNetV2 = cv.models.ShuffleNetV2 ShuffleNetV2 = cv.models.ShuffleNetV2
HRNet_W18 = cv.models.HRNet_W18
AlexNet = cv.models.AlexNet
transforms = cv.transforms.cls_transforms transforms = cv.transforms.cls_transforms
...@@ -128,7 +128,6 @@ class CocoDetection(VOCDetection): ...@@ -128,7 +128,6 @@ class CocoDetection(VOCDetection):
coco_rec = (im_info, label_info) coco_rec = (im_info, label_info)
self.file_list.append([im_fname, coco_rec]) self.file_list.append([im_fname, coco_rec])
if not len(self.file_list) > 0: if not len(self.file_list) > 0:
raise Exception('not found any coco record in %s' % (ann_file)) raise Exception('not found any coco record in %s' % (ann_file))
logging.info("{} samples in file {}".format( logging.info("{} samples in file {}".format(
......
...@@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data): ...@@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data):
padding_batch = [] padding_batch = []
for data in batch_data: for data in batch_data:
im_c, im_h, im_w = data[0].shape[:] im_c, im_h, im_w = data[0].shape[:]
padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), padding_im = np.zeros(
dtype=np.float32) (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = data[0] padding_im[:, :im_h, :im_w] = data[0]
padding_batch.append((padding_im, ) + data[1:]) padding_batch.append((padding_im, ) + data[1:])
return padding_batch return padding_batch
...@@ -226,8 +226,8 @@ class Dataset: ...@@ -226,8 +226,8 @@ class Dataset:
if num_workers == 'auto': if num_workers == 'auto':
import multiprocessing as mp import multiprocessing as mp
num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8 num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8
if platform.platform().startswith( if platform.platform().startswith("Darwin") or platform.platform(
"Darwin") or platform.platform().startswith("Windows"): ).startswith("Windows"):
parallel_method = 'thread' parallel_method = 'thread'
if transforms is None: if transforms is None:
raise Exception("transform should be defined.") raise Exception("transform should be defined.")
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import copy import copy
import os
import os.path as osp import os.path as osp
import random import random
import re import re
...@@ -122,9 +123,13 @@ class VOCDetection(Dataset): ...@@ -122,9 +123,13 @@ class VOCDetection(Dataset):
is_crowd = np.zeros((len(objs), 1), dtype=np.int32) is_crowd = np.zeros((len(objs), 1), dtype=np.int32)
difficult = np.zeros((len(objs), 1), dtype=np.int32) difficult = np.zeros((len(objs), 1), dtype=np.int32)
for i, obj in enumerate(objs): for i, obj in enumerate(objs):
<<<<<<< HEAD
pattern = re.compile('<name>', re.IGNORECASE) pattern = re.compile('<name>', re.IGNORECASE)
name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1] name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
cname = obj.find(name_tag).text.strip() cname = obj.find(name_tag).text.strip()
=======
cname = obj.find('name').text.strip()
>>>>>>> e9b8c938bf48a74eba95458257b512cfcbdbcca3
gt_class[i][0] = cname2cid[cname] gt_class[i][0] = cname2cid[cname]
pattern = re.compile('<difficult>', re.IGNORECASE) pattern = re.compile('<difficult>', re.IGNORECASE)
diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1] diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
...@@ -197,6 +202,44 @@ class VOCDetection(Dataset): ...@@ -197,6 +202,44 @@ class VOCDetection(Dataset):
self.coco_gt.dataset = annotations self.coco_gt.dataset = annotations
self.coco_gt.createIndex() self.coco_gt.createIndex()
def add_negative_samples(self, image_dir):
import cv2
if not osp.exists(image_dir):
raise Exception("{} background images directory does not exist.".
format(image_dir))
image_list = os.listdir(image_dir)
max_img_id = max(self.coco_gt.getImgIds())
for image in image_list:
if not is_pic(image):
continue
# False ground truth
gt_bbox = np.array([[0, 0, 1e-05, 1e-05]], dtype=np.float32)
gt_class = np.array([[0]], dtype=np.int32)
gt_score = np.ones((1, 1), dtype=np.float32)
is_crowd = np.array([[0]], dtype=np.int32)
difficult = np.zeros((1, 1), dtype=np.int32)
gt_poly = [[[0, 0, 0, 1e-05, 1e-05, 1e-05, 1e-05, 0]]]
max_img_id += 1
im_fname = osp.join(image_dir, image)
img_data = cv2.imread(im_fname)
im_h, im_w, im_c = img_data.shape
im_info = {
'im_id': np.array([max_img_id]).astype('int32'),
'image_shape': np.array([im_h, im_w]).astype('int32'),
}
label_info = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_score': gt_score,
'difficult': difficult,
'gt_poly': gt_poly
}
coco_rec = (im_info, label_info)
self.file_list.append([im_fname, coco_rec])
self.num_samples = len(self.file_list)
def iterator(self): def iterator(self):
self._epoch += 1 self._epoch += 1
self._pos = 0 self._pos = 0
......
...@@ -34,11 +34,15 @@ from .classifier import DenseNet121 ...@@ -34,11 +34,15 @@ from .classifier import DenseNet121
from .classifier import DenseNet161 from .classifier import DenseNet161
from .classifier import DenseNet201 from .classifier import DenseNet201
from .classifier import ShuffleNetV2 from .classifier import ShuffleNetV2
from .classifier import HRNet_W18
from .classifier import AlexNet
from .base import BaseAPI from .base import BaseAPI
from .yolo_v3 import YOLOv3 from .yolo_v3 import YOLOv3
from .faster_rcnn import FasterRCNN from .faster_rcnn import FasterRCNN
from .mask_rcnn import MaskRCNN from .mask_rcnn import MaskRCNN
from .unet import UNet from .unet import UNet
from .deeplabv3p import DeepLabv3p from .deeplabv3p import DeepLabv3p
from .hrnet import HRNet
from .fast_scnn import FastSCNN
from .load_model import load_model from .load_model import load_model
from .slim import prune from .slim import prune
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -79,9 +79,9 @@ class BaseAPI: ...@@ -79,9 +79,9 @@ class BaseAPI:
return int(batch_size // len(self.places)) return int(batch_size // len(self.places))
else: else:
raise Exception("Please support correct batch_size, \ raise Exception("Please support correct batch_size, \
which can be divided by available cards({}) in {}". which can be divided by available cards({}) in {}"
format(paddlex.env_info['num'], .format(paddlex.env_info['num'], paddlex.env_info[
paddlex.env_info['place'])) 'place']))
def build_program(self): def build_program(self):
# 构建训练网络 # 构建训练网络
...@@ -194,12 +194,37 @@ class BaseAPI: ...@@ -194,12 +194,37 @@ class BaseAPI:
if os.path.exists(pretrain_dir): if os.path.exists(pretrain_dir):
os.remove(pretrain_dir) os.remove(pretrain_dir)
os.makedirs(pretrain_dir) os.makedirs(pretrain_dir)
if pretrain_weights is not None and not os.path.exists(
pretrain_weights):
if self.model_type == 'classifier':
if pretrain_weights not in ['IMAGENET']:
logging.warning(
"Pretrain_weights for classifier should be defined as directory path or parameter file or 'IMAGENET' or None, but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
elif self.model_type == 'detector':
if pretrain_weights not in ['IMAGENET', 'COCO']:
logging.warning(
"Pretrain_weights for detector should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or None, but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
elif self.model_type == 'segmenter':
if pretrain_weights not in [
'IMAGENET', 'COCO', 'CITYSCAPES'
]:
logging.warning(
"Pretrain_weights for segmenter should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or 'CITYSCAPES', but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
if hasattr(self, 'backbone'): if hasattr(self, 'backbone'):
backbone = self.backbone backbone = self.backbone
else: else:
backbone = self.__class__.__name__ backbone = self.__class__.__name__
if backbone == "HRNet":
backbone = backbone + "_W{}".format(self.width)
class_name = self.__class__.__name__
pretrain_weights = get_pretrain_weights( pretrain_weights = get_pretrain_weights(
pretrain_weights, self.model_type, backbone, pretrain_dir) pretrain_weights, class_name, backbone, pretrain_dir)
if startup_prog is None: if startup_prog is None:
startup_prog = fluid.default_startup_program() startup_prog = fluid.default_startup_program()
self.exe.run(startup_prog) self.exe.run(startup_prog)
...@@ -210,8 +235,8 @@ class BaseAPI: ...@@ -210,8 +235,8 @@ class BaseAPI:
paddlex.utils.utils.load_pretrain_weights( paddlex.utils.utils.load_pretrain_weights(
self.exe, self.train_prog, resume_checkpoint, resume=True) self.exe, self.train_prog, resume_checkpoint, resume=True)
if not osp.exists(osp.join(resume_checkpoint, "model.yml")): if not osp.exists(osp.join(resume_checkpoint, "model.yml")):
raise Exception( raise Exception("There's not model.yml in {}".format(
"There's not model.yml in {}".format(resume_checkpoint)) resume_checkpoint))
with open(osp.join(resume_checkpoint, "model.yml")) as f: with open(osp.join(resume_checkpoint, "model.yml")) as f:
info = yaml.load(f.read(), Loader=yaml.Loader) info = yaml.load(f.read(), Loader=yaml.Loader)
self.completed_epochs = info['completed_epochs'] self.completed_epochs = info['completed_epochs']
...@@ -260,6 +285,7 @@ class BaseAPI: ...@@ -260,6 +285,7 @@ class BaseAPI:
info['_Attributes']['num_classes'] = self.num_classes info['_Attributes']['num_classes'] = self.num_classes
info['_Attributes']['labels'] = self.labels info['_Attributes']['labels'] = self.labels
info['_Attributes']['fixed_input_shape'] = self.fixed_input_shape
try: try:
primary_metric_key = list(self.eval_metrics.keys())[0] primary_metric_key = list(self.eval_metrics.keys())[0]
primary_metric_value = float(self.eval_metrics[primary_metric_key]) primary_metric_value = float(self.eval_metrics[primary_metric_key])
...@@ -269,13 +295,13 @@ class BaseAPI: ...@@ -269,13 +295,13 @@ class BaseAPI:
except: except:
pass pass
if hasattr(self.test_transforms, 'to_rgb'):
if self.test_transforms.to_rgb:
info['TransformsMode'] = 'RGB'
else:
info['TransformsMode'] = 'BGR'
if hasattr(self, 'test_transforms'): if hasattr(self, 'test_transforms'):
if hasattr(self.test_transforms, 'to_rgb'):
if self.test_transforms.to_rgb:
info['TransformsMode'] = 'RGB'
else:
info['TransformsMode'] = 'BGR'
if self.test_transforms is not None: if self.test_transforms is not None:
info['Transforms'] = list() info['Transforms'] = list()
for op in self.test_transforms.transforms: for op in self.test_transforms.transforms:
...@@ -362,8 +388,8 @@ class BaseAPI: ...@@ -362,8 +388,8 @@ class BaseAPI:
# 模型保存成功的标志 # 模型保存成功的标志
open(osp.join(save_dir, '.success'), 'w').close() open(osp.join(save_dir, '.success'), 'w').close()
logging.info( logging.info("Model for inference deploy saved in {}.".format(
"Model for inference deploy saved in {}.".format(save_dir)) save_dir))
def train_loop(self, def train_loop(self,
num_epochs, num_epochs,
...@@ -377,7 +403,8 @@ class BaseAPI: ...@@ -377,7 +403,8 @@ class BaseAPI:
early_stop=False, early_stop=False,
early_stop_patience=5): early_stop_patience=5):
if train_dataset.num_samples < train_batch_size: if train_dataset.num_samples < train_batch_size:
raise Exception('The amount of training datset must be larger than batch size.') raise Exception(
'The amount of training datset must be larger than batch size.')
if not osp.isdir(save_dir): if not osp.isdir(save_dir):
if osp.exists(save_dir): if osp.exists(save_dir):
os.remove(save_dir) os.remove(save_dir)
...@@ -415,8 +442,8 @@ class BaseAPI: ...@@ -415,8 +442,8 @@ class BaseAPI:
build_strategy=build_strategy, build_strategy=build_strategy,
exec_strategy=exec_strategy) exec_strategy=exec_strategy)
total_num_steps = math.floor( total_num_steps = math.floor(train_dataset.num_samples /
train_dataset.num_samples / train_batch_size) train_batch_size)
num_steps = 0 num_steps = 0
time_stat = list() time_stat = list()
time_train_one_epoch = None time_train_one_epoch = None
...@@ -430,8 +457,8 @@ class BaseAPI: ...@@ -430,8 +457,8 @@ class BaseAPI:
if self.model_type == 'detector': if self.model_type == 'detector':
eval_batch_size = self._get_single_card_bs(train_batch_size) eval_batch_size = self._get_single_card_bs(train_batch_size)
if eval_dataset is not None: if eval_dataset is not None:
total_num_steps_eval = math.ceil( total_num_steps_eval = math.ceil(eval_dataset.num_samples /
eval_dataset.num_samples / eval_batch_size) eval_batch_size)
if use_vdl: if use_vdl:
# VisualDL component # VisualDL component
...@@ -473,7 +500,9 @@ class BaseAPI: ...@@ -473,7 +500,9 @@ class BaseAPI:
if use_vdl: if use_vdl:
for k, v in step_metrics.items(): for k, v in step_metrics.items():
log_writer.add_scalar('Metrics/Training(Step): {}'.format(k), v, num_steps) log_writer.add_scalar(
'Metrics/Training(Step): {}'.format(k), v,
num_steps)
# 估算剩余时间 # 估算剩余时间
avg_step_time = np.mean(time_stat) avg_step_time = np.mean(time_stat)
...@@ -481,11 +510,12 @@ class BaseAPI: ...@@ -481,11 +510,12 @@ class BaseAPI:
eta = (num_epochs - i - 1) * time_train_one_epoch + ( eta = (num_epochs - i - 1) * time_train_one_epoch + (
total_num_steps - step - 1) * avg_step_time total_num_steps - step - 1) * avg_step_time
else: else:
eta = ((num_epochs - i) * total_num_steps - step - eta = ((num_epochs - i) * total_num_steps - step - 1
1) * avg_step_time ) * avg_step_time
if time_eval_one_epoch is not None: if time_eval_one_epoch is not None:
eval_eta = (total_eval_times - i // eval_eta = (
save_interval_epochs) * time_eval_one_epoch total_eval_times - i // save_interval_epochs
) * time_eval_one_epoch
else: else:
eval_eta = ( eval_eta = (
total_eval_times - i // save_interval_epochs total_eval_times - i // save_interval_epochs
...@@ -495,16 +525,18 @@ class BaseAPI: ...@@ -495,16 +525,18 @@ class BaseAPI:
logging.info( logging.info(
"[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}" "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}"
.format(i + 1, num_epochs, step + 1, total_num_steps, .format(i + 1, num_epochs, step + 1, total_num_steps,
dict2str(step_metrics), round( dict2str(step_metrics),
avg_step_time, 2), eta_str)) round(avg_step_time, 2), eta_str))
train_metrics = OrderedDict( train_metrics = OrderedDict(
zip(list(self.train_outputs.keys()), np.mean(records, axis=0))) zip(list(self.train_outputs.keys()), np.mean(
records, axis=0)))
logging.info('[TRAIN] Epoch {} finished, {} .'.format( logging.info('[TRAIN] Epoch {} finished, {} .'.format(
i + 1, dict2str(train_metrics))) i + 1, dict2str(train_metrics)))
time_train_one_epoch = time.time() - epoch_start_time time_train_one_epoch = time.time() - epoch_start_time
epoch_start_time = time.time() epoch_start_time = time.time()
# 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存 # 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存
self.completed_epochs += 1
eval_epoch_start_time = time.time() eval_epoch_start_time = time.time()
if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1: if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1:
current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1)) current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1))
...@@ -518,7 +550,6 @@ class BaseAPI: ...@@ -518,7 +550,6 @@ class BaseAPI:
return_details=True) return_details=True)
logging.info('[EVAL] Finished, Epoch={}, {} .'.format( logging.info('[EVAL] Finished, Epoch={}, {} .'.format(
i + 1, dict2str(self.eval_metrics))) i + 1, dict2str(self.eval_metrics)))
self.completed_epochs += 1
# 保存最优模型 # 保存最优模型
best_accuracy_key = list(self.eval_metrics.keys())[0] best_accuracy_key = list(self.eval_metrics.keys())[0]
current_accuracy = self.eval_metrics[best_accuracy_key] current_accuracy = self.eval_metrics[best_accuracy_key]
...@@ -534,7 +565,8 @@ class BaseAPI: ...@@ -534,7 +565,8 @@ class BaseAPI:
if isinstance(v, np.ndarray): if isinstance(v, np.ndarray):
if v.size > 1: if v.size > 1:
continue continue
log_writer.add_scalar("Metrics/Eval(Epoch): {}".format(k), v, i+1) log_writer.add_scalar(
"Metrics/Eval(Epoch): {}".format(k), v, i + 1)
self.save_model(save_dir=current_save_dir) self.save_model(save_dir=current_save_dir)
time_eval_one_epoch = time.time() - eval_epoch_start_time time_eval_one_epoch = time.time() - eval_epoch_start_time
eval_epoch_start_time = time.time() eval_epoch_start_time = time.time()
...@@ -545,4 +577,4 @@ class BaseAPI: ...@@ -545,4 +577,4 @@ class BaseAPI:
best_accuracy)) best_accuracy))
if eval_dataset is not None and early_stop: if eval_dataset is not None and early_stop:
if earlystop(current_accuracy): if earlystop(current_accuracy):
break break
\ No newline at end of file
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
import numpy as np import numpy as np
...@@ -40,20 +40,21 @@ class BaseClassifier(BaseAPI): ...@@ -40,20 +40,21 @@ class BaseClassifier(BaseAPI):
self.init_params = locals() self.init_params = locals()
super(BaseClassifier, self).__init__('classifier') super(BaseClassifier, self).__init__('classifier')
if not hasattr(paddlex.cv.nets, str.lower(model_name)): if not hasattr(paddlex.cv.nets, str.lower(model_name)):
raise Exception( raise Exception("ERROR: There's no model named {}.".format(
"ERROR: There's no model named {}.".format(model_name)) model_name))
self.model_name = model_name self.model_name = model_name
self.labels = None self.labels = None
self.num_classes = num_classes self.num_classes = num_classes
self.fixed_input_shape = None self.fixed_input_shape = None
def build_net(self, mode='train'): def build_net(self, mode='train'):
if self.__class__.__name__ == "AlexNet":
assert self.fixed_input_shape is not None, "In AlexNet, input_shape should be defined, e.g. model = paddlex.cls.AlexNet(num_classes=1000, input_shape=[224, 224])"
if self.fixed_input_shape is not None: if self.fixed_input_shape is not None:
input_shape = [ input_shape = [
None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
] ]
image = fluid.data( image = fluid.data(dtype='float32', shape=input_shape, name='image')
dtype='float32', shape=input_shape, name='image')
else: else:
image = fluid.data( image = fluid.data(
dtype='float32', shape=[None, 3, None, None], name='image') dtype='float32', shape=[None, 3, None, None], name='image')
...@@ -81,7 +82,8 @@ class BaseClassifier(BaseAPI): ...@@ -81,7 +82,8 @@ class BaseClassifier(BaseAPI):
del outputs['loss'] del outputs['loss']
return inputs, outputs return inputs, outputs
def default_optimizer(self, learning_rate, lr_decay_epochs, lr_decay_gamma, def default_optimizer(self, learning_rate, warmup_steps, warmup_start_lr,
lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch): num_steps_each_epoch):
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [ values = [
...@@ -90,6 +92,24 @@ class BaseClassifier(BaseAPI): ...@@ -90,6 +92,24 @@ class BaseClassifier(BaseAPI):
] ]
lr_decay = fluid.layers.piecewise_decay( lr_decay = fluid.layers.piecewise_decay(
boundaries=boundaries, values=values) boundaries=boundaries, values=values)
if warmup_steps > 0:
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
logging.error(
"In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch,
warmup_steps // num_steps_each_epoch))
lr_decay = fluid.layers.linear_lr_warmup(
learning_rate=lr_decay,
warmup_steps=warmup_steps,
start_lr=warmup_start_lr,
end_lr=learning_rate)
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
lr_decay, lr_decay,
momentum=0.9, momentum=0.9,
...@@ -107,6 +127,8 @@ class BaseClassifier(BaseAPI): ...@@ -107,6 +127,8 @@ class BaseClassifier(BaseAPI):
pretrain_weights='IMAGENET', pretrain_weights='IMAGENET',
optimizer=None, optimizer=None,
learning_rate=0.025, learning_rate=0.025,
warmup_steps=0,
warmup_start_lr=0.0,
lr_decay_epochs=[30, 60, 90], lr_decay_epochs=[30, 60, 90],
lr_decay_gamma=0.1, lr_decay_gamma=0.1,
use_vdl=False, use_vdl=False,
...@@ -129,6 +151,8 @@ class BaseClassifier(BaseAPI): ...@@ -129,6 +151,8 @@ class BaseClassifier(BaseAPI):
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的初始学习率。默认为0.025。 learning_rate (float): 默认优化器的初始学习率。默认为0.025。
warmup_steps(int): 学习率从warmup_start_lr上升至设定的learning_rate,所需的步数,默认为0
warmup_start_lr(float): 学习率在warmup阶段时的起始值,默认为0.0
lr_decay_epochs (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。 lr_decay_epochs (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。
lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。 lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。
use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。 use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。
...@@ -149,6 +173,8 @@ class BaseClassifier(BaseAPI): ...@@ -149,6 +173,8 @@ class BaseClassifier(BaseAPI):
num_steps_each_epoch = train_dataset.num_samples // train_batch_size num_steps_each_epoch = train_dataset.num_samples // train_batch_size
optimizer = self.default_optimizer( optimizer = self.default_optimizer(
learning_rate=learning_rate, learning_rate=learning_rate,
warmup_steps=warmup_steps,
warmup_start_lr=warmup_start_lr,
lr_decay_epochs=lr_decay_epochs, lr_decay_epochs=lr_decay_epochs,
lr_decay_gamma=lr_decay_gamma, lr_decay_gamma=lr_decay_gamma,
num_steps_each_epoch=num_steps_each_epoch) num_steps_each_epoch=num_steps_each_epoch)
...@@ -193,8 +219,7 @@ class BaseClassifier(BaseAPI): ...@@ -193,8 +219,7 @@ class BaseClassifier(BaseAPI):
tuple (metrics, eval_details): 当return_details为True时,增加返回dict, tuple (metrics, eval_details): 当return_details为True时,增加返回dict,
包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。 包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。
""" """
self.arrange_transforms( self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
transforms=eval_dataset.transforms, mode='eval')
data_generator = eval_dataset.generator( data_generator = eval_dataset.generator(
batch_size=batch_size, drop_last=False) batch_size=batch_size, drop_last=False)
k = min(5, self.num_classes) k = min(5, self.num_classes)
...@@ -206,9 +231,8 @@ class BaseClassifier(BaseAPI): ...@@ -206,9 +231,8 @@ class BaseClassifier(BaseAPI):
self.test_prog).with_data_parallel( self.test_prog).with_data_parallel(
share_vars_from=self.parallel_train_prog) share_vars_from=self.parallel_train_prog)
batch_size_each_gpu = self._get_single_card_bs(batch_size) batch_size_each_gpu = self._get_single_card_bs(batch_size)
logging.info( logging.info("Start to evaluating(total_samples={}, total_steps={})...".
"Start to evaluating(total_samples={}, total_steps={})...".format( format(eval_dataset.num_samples, total_steps))
eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm( for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps): enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32') images = np.array([d[0] for d in data]).astype('float32')
...@@ -218,15 +242,14 @@ class BaseClassifier(BaseAPI): ...@@ -218,15 +242,14 @@ class BaseClassifier(BaseAPI):
num_pad_samples = batch_size - num_samples num_pad_samples = batch_size - num_samples
pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
images = np.concatenate([images, pad_images]) images = np.concatenate([images, pad_images])
outputs = self.exe.run( outputs = self.exe.run(self.parallel_test_prog,
self.parallel_test_prog, feed={'image': images},
feed={'image': images}, fetch_list=list(self.test_outputs.values()))
fetch_list=list(self.test_outputs.values()))
outputs = [outputs[0][:num_samples]] outputs = [outputs[0][:num_samples]]
true_labels.extend(labels) true_labels.extend(labels)
pred_scores.extend(outputs[0].tolist()) pred_scores.extend(outputs[0].tolist())
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
pred_top1_label = np.argsort(pred_scores)[:, -1] pred_top1_label = np.argsort(pred_scores)[:, -1]
pred_topk_label = np.argsort(pred_scores)[:, -k:] pred_topk_label = np.argsort(pred_scores)[:, -k:]
...@@ -263,10 +286,10 @@ class BaseClassifier(BaseAPI): ...@@ -263,10 +286,10 @@ class BaseClassifier(BaseAPI):
self.arrange_transforms( self.arrange_transforms(
transforms=self.test_transforms, mode='test') transforms=self.test_transforms, mode='test')
im = self.test_transforms(img_file) im = self.test_transforms(img_file)
result = self.exe.run( result = self.exe.run(self.test_prog,
self.test_prog, feed={'image': im},
feed={'image': im}, fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values())) use_program_cache=True)
pred_label = np.argsort(result[0][0])[::-1][:true_topk] pred_label = np.argsort(result[0][0])[::-1][:true_topk]
res = [{ res = [{
'category_id': l, 'category_id': l,
...@@ -400,3 +423,16 @@ class ShuffleNetV2(BaseClassifier): ...@@ -400,3 +423,16 @@ class ShuffleNetV2(BaseClassifier):
def __init__(self, num_classes=1000): def __init__(self, num_classes=1000):
super(ShuffleNetV2, self).__init__( super(ShuffleNetV2, self).__init__(
model_name='ShuffleNetV2', num_classes=num_classes) model_name='ShuffleNetV2', num_classes=num_classes)
class HRNet_W18(BaseClassifier):
def __init__(self, num_classes=1000):
super(HRNet_W18, self).__init__(
model_name='HRNet_W18', num_classes=num_classes)
class AlexNet(BaseClassifier):
def __init__(self, num_classes=1000, input_shape=None):
super(AlexNet, self).__init__(
model_name='AlexNet', num_classes=num_classes)
self.fixed_input_shape = input_shape
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
import os.path as osp import os.path as osp
...@@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI): ...@@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI):
if mode == 'train': if mode == 'train':
self.optimizer.minimize(model_out) self.optimizer.minimize(model_out)
outputs['loss'] = model_out outputs['loss'] = model_out
elif mode == 'eval':
outputs['loss'] = model_out[0]
outputs['pred'] = model_out[1]
outputs['label'] = model_out[2]
outputs['mask'] = model_out[3]
else: else:
outputs['pred'] = model_out[0] outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1] outputs['logit'] = model_out[1]
...@@ -247,14 +242,16 @@ class DeepLabv3p(BaseAPI): ...@@ -247,14 +242,16 @@ class DeepLabv3p(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。 save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为字符串'CITYSCAPES',
则自动下载在CITYSCAPES数据集上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。 learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率衰减指数。默认0.9。 lr_decay_power (float): 默认优化器学习率衰减指数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。 early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
...@@ -336,18 +333,27 @@ class DeepLabv3p(BaseAPI): ...@@ -336,18 +333,27 @@ class DeepLabv3p(BaseAPI):
for step, data in tqdm.tqdm( for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps): enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]) images = np.array([d[0] for d in data])
labels = np.array([d[1] for d in data])
_, _, im_h, im_w = images.shape
labels = list()
for d in data:
padding_label = np.zeros(
(1, im_h, im_w)).astype('int64') + self.ignore_index
_, label_h, label_w = d[1].shape
padding_label[:, :label_h, :label_w] = d[1]
labels.append(padding_label)
labels = np.array(labels)
num_samples = images.shape[0] num_samples = images.shape[0]
if num_samples < batch_size: if num_samples < batch_size:
num_pad_samples = batch_size - num_samples num_pad_samples = batch_size - num_samples
pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
images = np.concatenate([images, pad_images]) images = np.concatenate([images, pad_images])
feed_data = {'image': images} feed_data = {'image': images}
outputs = self.exe.run( outputs = self.exe.run(self.parallel_test_prog,
self.parallel_test_prog, feed=feed_data,
feed=feed_data, fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=True)
return_numpy=True)
pred = outputs[0] pred = outputs[0]
if num_samples < batch_size: if num_samples < batch_size:
pred = pred[0:num_samples] pred = pred[0:num_samples]
...@@ -364,8 +370,7 @@ class DeepLabv3p(BaseAPI): ...@@ -364,8 +370,7 @@ class DeepLabv3p(BaseAPI):
metrics = OrderedDict( metrics = OrderedDict(
zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'], zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'],
[miou, category_iou, macc, category_acc, [miou, category_iou, macc, category_acc, conf_mat.kappa()]))
conf_mat.kappa()]))
if return_details: if return_details:
eval_details = { eval_details = {
'confusion_matrix': conf_mat.confusion_matrix.tolist() 'confusion_matrix': conf_mat.confusion_matrix.tolist()
...@@ -394,10 +399,10 @@ class DeepLabv3p(BaseAPI): ...@@ -394,10 +399,10 @@ class DeepLabv3p(BaseAPI):
transforms=self.test_transforms, mode='test') transforms=self.test_transforms, mode='test')
im, im_info = self.test_transforms(im_file) im, im_info = self.test_transforms(im_file)
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
result = self.exe.run( result = self.exe.run(self.test_prog,
self.test_prog, feed={'image': im},
feed={'image': im}, fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values())) use_program_cache=True)
pred = result[0] pred = result[0]
pred = np.squeeze(pred).astype('uint8') pred = np.squeeze(pred).astype('uint8')
logit = result[1] logit = result[1]
...@@ -413,6 +418,6 @@ class DeepLabv3p(BaseAPI): ...@@ -413,6 +418,6 @@ class DeepLabv3p(BaseAPI):
pred = pred[0:h, 0:w] pred = pred[0:h, 0:w]
logit = logit[0:h, 0:w, :] logit = logit[0:h, 0:w, :]
else: else:
raise Exception("Unexpected info '{}' in im_info".format( raise Exception("Unexpected info '{}' in im_info".format(info[
info[0])) 0]))
return {'label_map': pred, 'score_map': logit} return {'label_map': pred, 'score_map': logit}
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddle.fluid as fluid
import paddlex
from collections import OrderedDict
from .deeplabv3p import DeepLabv3p
class FastSCNN(DeepLabv3p):
"""实现Fast SCNN网络的构建并进行训练、评估、预测和模型导出。
Args:
num_classes (int): 类别数。
use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。
当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为
num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重
自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,
即平时使用的交叉熵损失函数。
ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。
multi_loss_weight (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。
也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,
fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,
lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
Raises:
ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
ValueError: class_weight为list, 但长度不等于num_class。
class_weight为str, 但class_weight.low()不等于dynamic。
TypeError: class_weight不为None时,其类型不是list或str。
TypeError: multi_loss_weight不为list。
ValueError: multi_loss_weight为list但长度小于0或者大于3。
"""
def __init__(self,
num_classes=2,
use_bce_loss=False,
use_dice_loss=False,
class_weight=None,
ignore_index=255,
multi_loss_weight=[1.0]):
self.init_params = locals()
super(DeepLabv3p, self).__init__('segmenter')
# dice_loss或bce_loss只适用两类分割中
if num_classes > 2 and (use_bce_loss or use_dice_loss):
raise ValueError(
"dice loss and bce loss is only applicable to binary classfication"
)
if class_weight is not None:
if isinstance(class_weight, list):
if len(class_weight) != num_classes:
raise ValueError(
"Length of class_weight should be equal to number of classes"
)
elif isinstance(class_weight, str):
if class_weight.lower() != 'dynamic':
raise ValueError(
"if class_weight is string, must be dynamic!")
else:
raise TypeError(
'Expect class_weight is a list or string but receive {}'.
format(type(class_weight)))
if not isinstance(multi_loss_weight, list):
raise TypeError(
'Expect multi_loss_weight is a list but receive {}'.format(
type(multi_loss_weight)))
if len(multi_loss_weight) > 3 or len(multi_loss_weight) < 0:
raise ValueError(
"Length of multi_loss_weight should be lower than or equal to 3 but greater than 0."
)
self.num_classes = num_classes
self.use_bce_loss = use_bce_loss
self.use_dice_loss = use_dice_loss
self.class_weight = class_weight
self.multi_loss_weight = multi_loss_weight
self.ignore_index = ignore_index
self.labels = None
self.fixed_input_shape = None
def build_net(self, mode='train'):
model = paddlex.cv.nets.segmentation.FastSCNN(
self.num_classes,
mode=mode,
use_bce_loss=self.use_bce_loss,
use_dice_loss=self.use_dice_loss,
class_weight=self.class_weight,
ignore_index=self.ignore_index,
multi_loss_weight=self.multi_loss_weight,
fixed_input_shape=self.fixed_input_shape)
inputs = model.generate_inputs()
model_out = model.build_net(inputs)
outputs = OrderedDict()
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
return inputs, outputs
def train(self,
num_epochs,
train_dataset,
train_batch_size=2,
eval_dataset=None,
save_interval_epochs=1,
log_interval_steps=2,
save_dir='output',
pretrain_weights='CITYSCAPES',
optimizer=None,
learning_rate=0.01,
lr_decay_power=0.9,
use_vdl=False,
sensitivities_file=None,
eval_metric_loss=0.05,
early_stop=False,
early_stop_patience=5,
resume_checkpoint=None):
"""训练。
Args:
num_epochs (int): 训练迭代轮数。
train_dataset (paddlex.datasets): 训练数据读取器。
train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
eval_dataset (paddlex.datasets): 评估数据读取器。
save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES'
则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'CITYSCAPES'。
optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
连续下降或持平,则终止训练。默认值为5。
resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
Raises:
ValueError: 模型从inference model进行加载。
"""
return super(FastSCNN, self).train(
num_epochs, train_dataset, train_batch_size, eval_dataset,
save_interval_epochs, log_interval_steps, save_dir,
pretrain_weights, optimizer, learning_rate, lr_decay_power,
use_vdl, sensitivities_file, eval_metric_loss, early_stop,
early_stop_patience, resume_checkpoint)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
import math import math
...@@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI): ...@@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI):
Args: Args:
num_classes (int): 包含了背景类的类别数。默认为81。 num_classes (int): 包含了背景类的类别数。默认为81。
backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
with_fpn (bool): 是否使用FPN结构。默认为True。 with_fpn (bool): 是否使用FPN结构。默认为True。
aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
...@@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI): ...@@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI):
self.init_params = locals() self.init_params = locals()
super(FasterRCNN, self).__init__('detector') super(FasterRCNN, self).__init__('detector')
backbones = [ backbones = [
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd' 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
'HRNet_W18'
] ]
assert backbone in backbones, "backbone should be one of {}".format( assert backbone in backbones, "backbone should be one of {}".format(
backbones) backbones)
...@@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI): ...@@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI):
layers = 101 layers = 101
variant = 'd' variant = 'd'
norm_type = 'affine_channel' norm_type = 'affine_channel'
elif backbone_name == 'HRNet_W18':
backbone = paddlex.cv.nets.hrnet.HRNet(
width=18, freeze_norm=True, norm_decay=0., freeze_at=0)
if self.with_fpn is False:
self.with_fpn = True
return backbone
if self.with_fpn: if self.with_fpn:
backbone = paddlex.cv.nets.resnet.ResNet( backbone = paddlex.cv.nets.resnet.ResNet(
norm_type='bn' if norm_type is None else norm_type, norm_type='bn' if norm_type is None else norm_type,
...@@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI): ...@@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI):
model_out = model.build_net(inputs) model_out = model.build_net(inputs)
loss = model_out['loss'] loss = model_out['loss']
self.optimizer.minimize(loss) self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']), outputs = OrderedDict(
('loss_cls', model_out['loss_cls']), [('loss', model_out['loss']),
('loss_bbox', model_out['loss_bbox']), ('loss_cls', model_out['loss_cls']),
('loss_rpn_cls', model_out['loss_rpn_cls']), ('loss_bbox', model_out['loss_bbox']),
('loss_rpn_bbox', ('loss_rpn_cls', model_out['loss_rpn_cls']), (
model_out['loss_rpn_bbox'])]) 'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else: else:
outputs = model.build_net(inputs) outputs = model.build_net(inputs)
return inputs, outputs return inputs, outputs
...@@ -131,8 +138,16 @@ class FasterRCNN(BaseAPI): ...@@ -131,8 +138,16 @@ class FasterRCNN(BaseAPI):
lr_decay_epochs, lr_decay_gamma, lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch): num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_steps should less than {}".format( logging.error(
lr_decay_epochs[0] * num_steps_each_epoch)) "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)] for i in range(len(lr_decay_epochs) + 1)]
...@@ -181,7 +196,8 @@ class FasterRCNN(BaseAPI): ...@@ -181,7 +196,8 @@ class FasterRCNN(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。 log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。
save_dir (str): 模型保存路径。默认值为'output'。 save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的初始学习率。默认为0.0025。 learning_rate (float): 默认优化器的初始学习率。默认为0.0025。
...@@ -227,7 +243,9 @@ class FasterRCNN(BaseAPI): ...@@ -227,7 +243,9 @@ class FasterRCNN(BaseAPI):
# 构建训练、验证、测试网络 # 构建训练、验证、测试网络
self.build_program() self.build_program()
fuse_bn = True fuse_bn = True
if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']: if self.with_fpn and self.backbone in [
'ResNet18', 'ResNet50', 'HRNet_W18'
]:
fuse_bn = False fuse_bn = False
self.net_initialize( self.net_initialize(
startup_prog=fluid.default_startup_program(), startup_prog=fluid.default_startup_program(),
...@@ -273,8 +291,7 @@ class FasterRCNN(BaseAPI): ...@@ -273,8 +291,7 @@ class FasterRCNN(BaseAPI):
eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
""" """
self.arrange_transforms( self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
transforms=eval_dataset.transforms, mode='eval')
if metric is None: if metric is None:
if hasattr(self, 'metric') and self.metric is not None: if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric metric = self.metric
...@@ -293,14 +310,12 @@ class FasterRCNN(BaseAPI): ...@@ -293,14 +310,12 @@ class FasterRCNN(BaseAPI):
logging.warning( logging.warning(
"Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1." "Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1."
) )
dataset = eval_dataset.generator( dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False)
batch_size=batch_size, drop_last=False)
total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
results = list() results = list()
logging.info( logging.info("Start to evaluating(total_samples={}, total_steps={})...".
"Start to evaluating(total_samples={}, total_steps={})...".format( format(eval_dataset.num_samples, total_steps))
eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps): for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32') images = np.array([d[0] for d in data]).astype('float32')
im_infos = np.array([d[1] for d in data]).astype('float32') im_infos = np.array([d[1] for d in data]).astype('float32')
...@@ -310,11 +325,10 @@ class FasterRCNN(BaseAPI): ...@@ -310,11 +325,10 @@ class FasterRCNN(BaseAPI):
'im_info': im_infos, 'im_info': im_infos,
'im_shape': im_shapes, 'im_shape': im_shapes,
} }
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed=[feed_data],
feed=[feed_data], fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False)
return_numpy=False)
res = { res = {
'bbox': (np.array(outputs[0]), 'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths()) outputs[0].recursive_sequence_lengths())
...@@ -339,13 +353,13 @@ class FasterRCNN(BaseAPI): ...@@ -339,13 +353,13 @@ class FasterRCNN(BaseAPI):
res['is_difficult'] = (np.array(res_is_difficult), res['is_difficult'] = (np.array(res_is_difficult),
[res_is_difficult_lod]) [res_is_difficult_lod])
results.append(res) results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
box_ap_stats, eval_details = eval_results( box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=True) results, metric, eval_dataset.coco_gt, with_background=True)
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], zip(['bbox_mmap'
box_ap_stats)) if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details: if return_details:
return metrics, eval_details return metrics, eval_details
return metrics return metrics
...@@ -359,7 +373,8 @@ class FasterRCNN(BaseAPI): ...@@ -359,7 +373,8 @@ class FasterRCNN(BaseAPI):
Returns: Returns:
list: 预测结果列表,每个预测结果由预测框类别标签、 list: 预测结果列表,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标、预测框得分组成。 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
预测框得分组成。
""" """
if transforms is None and not hasattr(self, 'test_transforms'): if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.") raise Exception("transforms need to be defined, now is None.")
...@@ -373,15 +388,15 @@ class FasterRCNN(BaseAPI): ...@@ -373,15 +388,15 @@ class FasterRCNN(BaseAPI):
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0) im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed={
feed={ 'image': im,
'image': im, 'im_info': im_resize_info,
'im_info': im_resize_info, 'im_shape': im_shape
'im_shape': im_shape },
}, fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False,
return_numpy=False) use_program_cache=True)
res = { res = {
k: (np.array(v), v.recursive_sequence_lengths()) k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs) for k, v in zip(list(self.test_outputs.keys()), outputs)
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddle.fluid as fluid
import paddlex
from collections import OrderedDict
from .deeplabv3p import DeepLabv3p
class HRNet(DeepLabv3p):
"""实现HRNet网络的构建并进行训练、评估、预测和模型导出。
Args:
num_classes (int): 类别数。
width (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。
use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。
当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为
num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重
自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,
即平时使用的交叉熵损失函数。
ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。
Raises:
ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
ValueError: class_weight为list, 但长度不等于num_class。
class_weight为str, 但class_weight.low()不等于dynamic。
TypeError: class_weight不为None时,其类型不是list或str。
"""
def __init__(self,
num_classes=2,
width=18,
use_bce_loss=False,
use_dice_loss=False,
class_weight=None,
ignore_index=255):
self.init_params = locals()
super(DeepLabv3p, self).__init__('segmenter')
# dice_loss或bce_loss只适用两类分割中
if num_classes > 2 and (use_bce_loss or use_dice_loss):
raise ValueError(
"dice loss and bce loss is only applicable to binary classfication"
)
if class_weight is not None:
if isinstance(class_weight, list):
if len(class_weight) != num_classes:
raise ValueError(
"Length of class_weight should be equal to number of classes"
)
elif isinstance(class_weight, str):
if class_weight.lower() != 'dynamic':
raise ValueError(
"if class_weight is string, must be dynamic!")
else:
raise TypeError(
'Expect class_weight is a list or string but receive {}'.
format(type(class_weight)))
self.num_classes = num_classes
self.width = width
self.use_bce_loss = use_bce_loss
self.use_dice_loss = use_dice_loss
self.class_weight = class_weight
self.ignore_index = ignore_index
self.labels = None
self.fixed_input_shape = None
def build_net(self, mode='train'):
model = paddlex.cv.nets.segmentation.HRNet(
self.num_classes,
width=self.width,
mode=mode,
use_bce_loss=self.use_bce_loss,
use_dice_loss=self.use_dice_loss,
class_weight=self.class_weight,
ignore_index=self.ignore_index,
fixed_input_shape=self.fixed_input_shape)
inputs = model.generate_inputs()
model_out = model.build_net(inputs)
outputs = OrderedDict()
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
return inputs, outputs
def default_optimizer(self,
learning_rate,
num_epochs,
num_steps_each_epoch,
lr_decay_power=0.9):
decay_step = num_epochs * num_steps_each_epoch
lr_decay = fluid.layers.polynomial_decay(
learning_rate,
decay_step,
end_learning_rate=0,
power=lr_decay_power)
optimizer = fluid.optimizer.Momentum(
lr_decay,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(
regularization_coeff=5e-04))
return optimizer
def train(self,
num_epochs,
train_dataset,
train_batch_size=2,
eval_dataset=None,
save_interval_epochs=1,
log_interval_steps=2,
save_dir='output',
pretrain_weights='IMAGENET',
optimizer=None,
learning_rate=0.01,
lr_decay_power=0.9,
use_vdl=False,
sensitivities_file=None,
eval_metric_loss=0.05,
early_stop=False,
early_stop_patience=5,
resume_checkpoint=None):
"""训练。
Args:
num_epochs (int): 训练迭代轮数。
train_dataset (paddlex.datasets): 训练数据读取器。
train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
eval_dataset (paddlex.datasets): 评估数据读取器。
save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在IMAGENET图片数据上预训练的模型权重;若为字符串'CITYSCAPES'
则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
连续下降或持平,则终止训练。默认值为5。
resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
Raises:
ValueError: 模型从inference model进行加载。
"""
return super(HRNet, self).train(
num_epochs, train_dataset, train_batch_size, eval_dataset,
save_interval_epochs, log_interval_steps, save_dir,
pretrain_weights, optimizer, learning_rate, lr_decay_power, use_vdl,
sensitivities_file, eval_metric_loss, early_stop,
early_stop_patience, resume_checkpoint)
...@@ -41,7 +41,16 @@ def load_model(model_dir, fixed_input_shape=None): ...@@ -41,7 +41,16 @@ def load_model(model_dir, fixed_input_shape=None):
if 'model_name' in info['_init_params']: if 'model_name' in info['_init_params']:
del info['_init_params']['model_name'] del info['_init_params']['model_name']
model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params']) model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params'])
model.fixed_input_shape = fixed_input_shape model.fixed_input_shape = fixed_input_shape
if '_Attributes' in info:
if 'fixed_input_shape' in info['_Attributes']:
fixed_input_shape = info['_Attributes']['fixed_input_shape']
if fixed_input_shape is not None:
logging.info("Model already has fixed_input_shape with {}".
format(fixed_input_shape))
model.fixed_input_shape = fixed_input_shape
if status == "Normal" or \ if status == "Normal" or \
status == "Prune" or status == "fluid.save": status == "Prune" or status == "fluid.save":
startup_prog = fluid.Program() startup_prog = fluid.Program()
...@@ -88,8 +97,8 @@ def load_model(model_dir, fixed_input_shape=None): ...@@ -88,8 +97,8 @@ def load_model(model_dir, fixed_input_shape=None):
model.model_type, info['Transforms'], info['BatchTransforms']) model.model_type, info['Transforms'], info['BatchTransforms'])
model.eval_transforms = copy.deepcopy(model.test_transforms) model.eval_transforms = copy.deepcopy(model.test_transforms)
else: else:
model.test_transforms = build_transforms( model.test_transforms = build_transforms(model.model_type,
model.model_type, info['Transforms'], to_rgb) info['Transforms'], to_rgb)
model.eval_transforms = copy.deepcopy(model.test_transforms) model.eval_transforms = copy.deepcopy(model.test_transforms)
if '_Attributes' in info: if '_Attributes' in info:
...@@ -107,20 +116,7 @@ def fix_input_shape(info, fixed_input_shape=None): ...@@ -107,20 +116,7 @@ def fix_input_shape(info, fixed_input_shape=None):
resize = {'ResizeByShort': {}} resize = {'ResizeByShort': {}}
padding = {'Padding': {}} padding = {'Padding': {}}
if info['_Attributes']['model_type'] == 'classifier': if info['_Attributes']['model_type'] == 'classifier':
crop_size = 0 pass
for transform in info['Transforms']:
if 'CenterCrop' in transform:
crop_size = transform['CenterCrop']['crop_size']
break
assert crop_size == fixed_input_shape[
0], "fixed_input_shape must == CenterCrop:crop_size:{}".format(
crop_size)
assert crop_size == fixed_input_shape[
1], "fixed_input_shape must == CenterCrop:crop_size:{}".format(
crop_size)
if crop_size == 0:
logging.warning(
"fixed_input_shape must == input shape when trainning")
else: else:
resize['ResizeByShort']['short_size'] = min(fixed_input_shape) resize['ResizeByShort']['short_size'] = min(fixed_input_shape)
resize['ResizeByShort']['max_size'] = max(fixed_input_shape) resize['ResizeByShort']['max_size'] = max(fixed_input_shape)
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
import math import math
...@@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN): ...@@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN):
Args: Args:
num_classes (int): 包含了背景类的类别数。默认为81。 num_classes (int): 包含了背景类的类别数。默认为81。
backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50',
'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。
with_fpn (bool): 是否使用FPN结构。默认为True。 with_fpn (bool): 是否使用FPN结构。默认为True。
aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。
anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。
...@@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN): ...@@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN):
anchor_sizes=[32, 64, 128, 256, 512]): anchor_sizes=[32, 64, 128, 256, 512]):
self.init_params = locals() self.init_params = locals()
backbones = [ backbones = [
'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd' 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd',
'HRNet_W18'
] ]
assert backbone in backbones, "backbone should be one of {}".format( assert backbone in backbones, "backbone should be one of {}".format(
backbones) backbones)
...@@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN): ...@@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN):
model_out = model.build_net(inputs) model_out = model.build_net(inputs)
loss = model_out['loss'] loss = model_out['loss']
self.optimizer.minimize(loss) self.optimizer.minimize(loss)
outputs = OrderedDict([('loss', model_out['loss']), outputs = OrderedDict(
('loss_cls', model_out['loss_cls']), [('loss', model_out['loss']),
('loss_bbox', model_out['loss_bbox']), ('loss_cls', model_out['loss_cls']),
('loss_mask', model_out['loss_mask']), ('loss_bbox', model_out['loss_bbox']),
('loss_rpn_cls', model_out['loss_rpn_cls']), ('loss_mask', model_out['loss_mask']),
('loss_rpn_bbox', ('loss_rpn_cls', model_out['loss_rpn_cls']), (
model_out['loss_rpn_bbox'])]) 'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
else: else:
outputs = model.build_net(inputs) outputs = model.build_net(inputs)
return inputs, outputs return inputs, outputs
...@@ -96,8 +97,16 @@ class MaskRCNN(FasterRCNN): ...@@ -96,8 +97,16 @@ class MaskRCNN(FasterRCNN):
lr_decay_epochs, lr_decay_gamma, lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch): num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_step should less than {}".format( logging.error(
lr_decay_epochs[0] * num_steps_each_epoch)) "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)] for i in range(len(lr_decay_epochs) + 1)]
...@@ -146,7 +155,8 @@ class MaskRCNN(FasterRCNN): ...@@ -146,7 +155,8 @@ class MaskRCNN(FasterRCNN):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。 log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。
save_dir (str): 模型保存路径。默认值为'output'。 save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的学习率。默认为1.0/800。 learning_rate (float): 默认优化器的学习率。默认为1.0/800。
...@@ -194,7 +204,9 @@ class MaskRCNN(FasterRCNN): ...@@ -194,7 +204,9 @@ class MaskRCNN(FasterRCNN):
# 构建训练、验证、测试网络 # 构建训练、验证、测试网络
self.build_program() self.build_program()
fuse_bn = True fuse_bn = True
if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']: if self.with_fpn and self.backbone in [
'ResNet18', 'ResNet50', 'HRNet_W18'
]:
fuse_bn = False fuse_bn = False
self.net_initialize( self.net_initialize(
startup_prog=fluid.default_startup_program(), startup_prog=fluid.default_startup_program(),
...@@ -241,8 +253,7 @@ class MaskRCNN(FasterRCNN): ...@@ -241,8 +253,7 @@ class MaskRCNN(FasterRCNN):
预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、 预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、
预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。 预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。
""" """
self.arrange_transforms( self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
transforms=eval_dataset.transforms, mode='eval')
if metric is None: if metric is None:
if hasattr(self, 'metric') and self.metric is not None: if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric metric = self.metric
...@@ -263,9 +274,8 @@ class MaskRCNN(FasterRCNN): ...@@ -263,9 +274,8 @@ class MaskRCNN(FasterRCNN):
total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size)
results = list() results = list()
logging.info( logging.info("Start to evaluating(total_samples={}, total_steps={})...".
"Start to evaluating(total_samples={}, total_steps={})...".format( format(eval_dataset.num_samples, total_steps))
eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm( for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps): enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]).astype('float32') images = np.array([d[0] for d in data]).astype('float32')
...@@ -276,11 +286,10 @@ class MaskRCNN(FasterRCNN): ...@@ -276,11 +286,10 @@ class MaskRCNN(FasterRCNN):
'im_info': im_infos, 'im_info': im_infos,
'im_shape': im_shapes, 'im_shape': im_shapes,
} }
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed=[feed_data],
feed=[feed_data], fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False)
return_numpy=False)
res = { res = {
'bbox': (np.array(outputs[0]), 'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths()), outputs[0].recursive_sequence_lengths()),
...@@ -292,8 +301,8 @@ class MaskRCNN(FasterRCNN): ...@@ -292,8 +301,8 @@ class MaskRCNN(FasterRCNN):
res['im_shape'] = (im_shapes, []) res['im_shape'] = (im_shapes, [])
res['im_id'] = (np.array(res_im_id), []) res['im_id'] = (np.array(res_im_id), [])
results.append(res) results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
ap_stats, eval_details = eval_results( ap_stats, eval_details = eval_results(
results, results,
...@@ -302,17 +311,16 @@ class MaskRCNN(FasterRCNN): ...@@ -302,17 +311,16 @@ class MaskRCNN(FasterRCNN):
with_background=True, with_background=True,
resolution=self.mask_head_resolution) resolution=self.mask_head_resolution)
if metric == 'VOC': if metric == 'VOC':
if isinstance(ap_stats[0], np.ndarray) and isinstance( if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
ap_stats[1], np.ndarray): np.ndarray):
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_map', 'segm_map'], zip(['bbox_map', 'segm_map'],
[ap_stats[0][1], ap_stats[1][1]])) [ap_stats[0][1], ap_stats[1][1]]))
else: else:
metrics = OrderedDict( metrics = OrderedDict(zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
elif metric == 'COCO': elif metric == 'COCO':
if isinstance(ap_stats[0], np.ndarray) and isinstance( if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
ap_stats[1], np.ndarray): np.ndarray):
metrics = OrderedDict( metrics = OrderedDict(
zip(['bbox_mmap', 'segm_mmap'], zip(['bbox_mmap', 'segm_mmap'],
[ap_stats[0][0], ap_stats[1][0]])) [ap_stats[0][0], ap_stats[1][0]]))
...@@ -331,8 +339,10 @@ class MaskRCNN(FasterRCNN): ...@@ -331,8 +339,10 @@ class MaskRCNN(FasterRCNN):
transforms (paddlex.det.transforms): 数据预处理操作。 transforms (paddlex.det.transforms): 数据预处理操作。
Returns: Returns:
dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、预测框坐标、预测框内的二值图、 dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、
预测框得分组成。 预测框坐标(坐标格式为[xmin, ymin, w, h])、
原图大小的预测二值图(1表示预测框类别,0表示背景类)、
预测框得分组成。
""" """
if transforms is None and not hasattr(self, 'test_transforms'): if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.") raise Exception("transforms need to be defined, now is None.")
...@@ -346,15 +356,15 @@ class MaskRCNN(FasterRCNN): ...@@ -346,15 +356,15 @@ class MaskRCNN(FasterRCNN):
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
im_resize_info = np.expand_dims(im_resize_info, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0)
im_shape = np.expand_dims(im_shape, axis=0) im_shape = np.expand_dims(im_shape, axis=0)
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed={
feed={ 'image': im,
'image': im, 'im_info': im_resize_info,
'im_info': im_resize_info, 'im_shape': im_shape
'im_shape': im_shape },
}, fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False,
return_numpy=False) use_program_cache=True)
res = { res = {
k: (np.array(v), v.recursive_sequence_lengths()) k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs) for k, v in zip(list(self.test_outputs.keys()), outputs)
...@@ -368,8 +378,8 @@ class MaskRCNN(FasterRCNN): ...@@ -368,8 +378,8 @@ class MaskRCNN(FasterRCNN):
import pycocotools.mask as mask_util import pycocotools.mask as mask_util
for index, xywh_res in enumerate(xywh_results): for index, xywh_res in enumerate(xywh_results):
del xywh_res['image_id'] del xywh_res['image_id']
xywh_res['mask'] = mask_util.decode( xywh_res['mask'] = mask_util.decode(segm_results[index][
segm_results[index]['segmentation']) 'segmentation'])
xywh_res['category'] = self.labels[xywh_res['category_id']] xywh_res['category'] = self.labels[xywh_res['category_id']]
results.append(xywh_res) results.append(xywh_res)
return results return results
...@@ -66,16 +66,15 @@ def sensitivity(program, ...@@ -66,16 +66,15 @@ def sensitivity(program,
progress = "%.2f%%" % (progress * 100) progress = "%.2f%%" % (progress * 100)
logging.info( logging.info(
"Total evaluate iters={}, current={}, progress={}, eta={}". "Total evaluate iters={}, current={}, progress={}, eta={}".
format( format(total_evaluate_iters, current_iter, progress,
total_evaluate_iters, current_iter, progress, seconds_to_hms(
seconds_to_hms( int(cost * (total_evaluate_iters - current_iter)))),
int(cost * (total_evaluate_iters - current_iter)))),
use_color=True) use_color=True)
current_iter += 1 current_iter += 1
pruner = Pruner() pruner = Pruner()
logging.info("sensitive - param: {}; ratios: {}".format( logging.info("sensitive - param: {}; ratios: {}".format(name,
name, ratio)) ratio))
pruned_program, param_backup, _ = pruner.prune( pruned_program, param_backup, _ = pruner.prune(
program=graph.program, program=graph.program,
scope=scope, scope=scope,
...@@ -87,8 +86,8 @@ def sensitivity(program, ...@@ -87,8 +86,8 @@ def sensitivity(program,
param_backup=True) param_backup=True)
pruned_metric = eval_func(pruned_program) pruned_metric = eval_func(pruned_program)
loss = (baseline - pruned_metric) / baseline loss = (baseline - pruned_metric) / baseline
logging.info("pruned param: {}; {}; loss={}".format( logging.info("pruned param: {}; {}; loss={}".format(name, ratio,
name, ratio, loss)) loss))
sensitivities[name][ratio] = loss sensitivities[name][ratio] = loss
...@@ -116,6 +115,21 @@ def channel_prune(program, prune_names, prune_ratios, place, only_graph=False): ...@@ -116,6 +115,21 @@ def channel_prune(program, prune_names, prune_ratios, place, only_graph=False):
Returns: Returns:
paddle.fluid.Program: 裁剪后的Program。 paddle.fluid.Program: 裁剪后的Program。
""" """
prog_var_shape_dict = {}
for var in program.list_vars():
try:
prog_var_shape_dict[var.name] = var.shape
except Exception:
pass
index = 0
for param, ratio in zip(prune_names, prune_ratios):
origin_num = prog_var_shape_dict[param][0]
pruned_num = int(round(origin_num * ratio))
while origin_num == pruned_num:
ratio -= 0.1
pruned_num = int(round(origin_num * (ratio)))
prune_ratios[index] = ratio
index += 1
scope = fluid.global_scope() scope = fluid.global_scope()
pruner = Pruner() pruner = Pruner()
program, _, _ = pruner.prune( program, _, _ = pruner.prune(
...@@ -221,6 +235,9 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8): ...@@ -221,6 +235,9 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8):
其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。 其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。
""" """
if os.path.exists(save_file):
os.remove(save_file)
prune_names = get_prune_params(model) prune_names = get_prune_params(model)
def eval_for_prune(program): def eval_for_prune(program):
...@@ -284,6 +301,19 @@ def cal_model_size(program, place, sensitivities_file, eval_metric_loss=0.05): ...@@ -284,6 +301,19 @@ def cal_model_size(program, place, sensitivities_file, eval_metric_loss=0.05):
""" """
prune_params_ratios = get_params_ratios(sensitivities_file, prune_params_ratios = get_params_ratios(sensitivities_file,
eval_metric_loss) eval_metric_loss)
prog_var_shape_dict = {}
for var in program.list_vars():
try:
prog_var_shape_dict[var.name] = var.shape
except Exception:
pass
for param, ratio in prune_params_ratios.items():
origin_num = prog_var_shape_dict[param][0]
pruned_num = int(round(origin_num * ratio))
while origin_num == pruned_num:
ratio -= 0.1
pruned_num = int(round(origin_num * (ratio)))
prune_params_ratios[param] = ratio
prune_program = channel_prune( prune_program = channel_prune(
program, program,
list(prune_params_ratios.keys()), list(prune_params_ratios.keys()),
......
...@@ -142,13 +142,16 @@ def get_prune_params(model): ...@@ -142,13 +142,16 @@ def get_prune_params(model):
program = model.test_prog program = model.test_prog
if model_type.startswith('ResNet') or \ if model_type.startswith('ResNet') or \
model_type.startswith('DenseNet') or \ model_type.startswith('DenseNet') or \
model_type.startswith('DarkNet'): model_type.startswith('DarkNet') or \
model_type.startswith('AlexNet'):
for block in program.blocks: for block in program.blocks:
for param in block.all_parameters(): for param in block.all_parameters():
pd_var = fluid.global_scope().find_var(param.name) pd_var = fluid.global_scope().find_var(param.name)
pd_param = pd_var.get_tensor() pd_param = pd_var.get_tensor()
if len(np.array(pd_param).shape) == 4: if len(np.array(pd_param).shape) == 4:
prune_names.append(param.name) prune_names.append(param.name)
if model_type == 'AlexNet':
prune_names.remove('conv5_weights')
elif model_type == "MobileNetV1": elif model_type == "MobileNetV1":
prune_names.append("conv1_weights") prune_names.append("conv1_weights")
for param in program.global_block().all_parameters(): for param in program.global_block().all_parameters():
...@@ -162,7 +165,7 @@ def get_prune_params(model): ...@@ -162,7 +165,7 @@ def get_prune_params(model):
continue continue
prune_names.append(param.name) prune_names.append(param.name)
elif model_type.startswith("MobileNetV3"): elif model_type.startswith("MobileNetV3"):
if model_type == 'MobileNetV3_small': if model_type.startswith('MobileNetV3_small'):
expand_prune_id = [3, 4] expand_prune_id = [3, 4]
else: else:
expand_prune_id = [2, 3, 4, 8, 9, 11] expand_prune_id = [2, 3, 4, 8, 9, 11]
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
import paddlex import paddlex
...@@ -95,11 +95,6 @@ class UNet(DeepLabv3p): ...@@ -95,11 +95,6 @@ class UNet(DeepLabv3p):
if mode == 'train': if mode == 'train':
self.optimizer.minimize(model_out) self.optimizer.minimize(model_out)
outputs['loss'] = model_out outputs['loss'] = model_out
elif mode == 'eval':
outputs['loss'] = model_out[0]
outputs['pred'] = model_out[1]
outputs['label'] = model_out[2]
outputs['mask'] = model_out[3]
else: else:
outputs['pred'] = model_out[0] outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1] outputs['logit'] = model_out[1]
...@@ -141,7 +136,7 @@ class UNet(DeepLabv3p): ...@@ -141,7 +136,7 @@ class UNet(DeepLabv3p):
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。 early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
......
import paddlex import paddlex
import paddlex.utils.logging as logging
import paddlehub as hub import paddlehub as hub
import os import os
import os.path as osp import os.path as osp
...@@ -56,19 +57,120 @@ image_pretrain = { ...@@ -56,19 +57,120 @@ image_pretrain = {
'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar', 'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar',
'ShuffleNetV2': 'ShuffleNetV2':
'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar', 'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar',
'HRNet_W18':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar',
'HRNet_W30':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W30_C_pretrained.tar',
'HRNet_W32':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W32_C_pretrained.tar',
'HRNet_W40':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W40_C_pretrained.tar',
'HRNet_W48':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W48_C_pretrained.tar',
'HRNet_W60':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W60_C_pretrained.tar',
'HRNet_W64':
'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar',
'AlexNet':
'http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar'
} }
coco_pretrain = { coco_pretrain = {
'UNet': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz' 'YOLOv3_DarkNet53_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar',
'YOLOv3_MobileNetV1_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar',
'YOLOv3_MobileNetV3_large_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams',
'YOLOv3_ResNet34_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar',
'YOLOv3_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn.tar',
'FasterRCNN_ResNet50_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_2x.tar',
'FasterRCNN_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar',
'FasterRCNN_ResNet101_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_2x.tar',
'FasterRCNN_ResNet101_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar',
'FasterRCNN_HRNet_W18_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_2x.tar',
'MaskRCNN_ResNet50_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar',
'MaskRCNN_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_vd_fpn_2x.tar',
'MaskRCNN_ResNet101_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar',
'MaskRCNN_ResNet101_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar',
'UNet_COCO': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz',
'DeepLabv3p_MobileNetV2_x1.0_COCO':
'https://bj.bcebos.com/v1/paddleseg/deeplab_mobilenet_x1_0_coco.tgz',
'DeepLabv3p_Xception65_COCO':
'https://paddleseg.bj.bcebos.com/models/xception65_coco.tgz'
}
cityscapes_pretrain = {
'DeepLabv3p_MobileNetV2_x1.0_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz',
'DeepLabv3p_Xception65_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz',
'HRNet_W18_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz',
'FastSCNN_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar'
} }
def get_pretrain_weights(flag, model_type, backbone, save_dir): def get_pretrain_weights(flag, class_name, backbone, save_dir):
if flag is None: if flag is None:
return None return None
elif osp.isdir(flag): elif osp.isdir(flag):
return flag return flag
elif osp.isfile(flag):
return flag
warning_info = "{} does not support to be finetuned with weights pretrained on the {} dataset, so pretrain_weights is forced to be set to {}"
if flag == 'COCO':
if class_name == "FasterRCNN" and backbone in ['ResNet18'] or \
class_name == "MaskRCNN" and backbone in ['ResNet18', 'HRNet_W18'] or \
class_name == 'DeepLabv3p' and backbone in ['Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0']:
model_name = '{}_{}'.format(class_name, backbone)
logging.warning(warning_info.format(model_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif class_name == 'HRNet':
logging.warning(warning_info.format(class_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif class_name == 'FastSCNN':
logging.warning(
warning_info.format(class_name, flag, 'CITYSCAPES'))
flag = 'CITYSCAPES'
elif flag == 'CITYSCAPES':
model_name = '{}_{}'.format(class_name, backbone)
if class_name == 'UNet':
logging.warning(warning_info.format(class_name, flag, 'COCO'))
flag = 'COCO'
if class_name == 'HRNet' and backbone.split('_')[
-1] in ['W30', 'W32', 'W40', 'W48', 'W60', 'W64']:
logging.warning(warning_info.format(backbone, flag, 'IMAGENET'))
flag = 'IMAGENET'
if class_name == 'DeepLabv3p' and backbone in [
'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5',
'MobileNetV2_x1.5', 'MobileNetV2_x2.0'
]:
model_name = '{}_{}'.format(class_name, backbone)
logging.warning(warning_info.format(model_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif flag == 'IMAGENET': elif flag == 'IMAGENET':
if class_name == 'UNet':
logging.warning(warning_info.format(class_name, flag, 'COCO'))
flag = 'COCO'
elif class_name == 'FastSCNN':
logging.warning(
warning_info.format(class_name, flag, 'CITYSCAPES'))
flag = 'CITYSCAPES'
if flag == 'IMAGENET':
new_save_dir = save_dir new_save_dir = save_dir
if hasattr(paddlex, 'pretrain_dir'): if hasattr(paddlex, 'pretrain_dir'):
new_save_dir = paddlex.pretrain_dir new_save_dir = paddlex.pretrain_dir
...@@ -80,15 +182,17 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): ...@@ -80,15 +182,17 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
backbone = 'MobileNetV3_small_x1_0_ssld' backbone = 'MobileNetV3_small_x1_0_ssld'
elif backbone == 'MobileNetV3_large_ssld': elif backbone == 'MobileNetV3_large_ssld':
backbone = 'MobileNetV3_large_x1_0_ssld' backbone = 'MobileNetV3_large_x1_0_ssld'
if model_type == 'detector': if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN']:
if backbone == 'ResNet50': if backbone == 'ResNet50':
backbone = 'DetResNet50' backbone = 'DetResNet50'
assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format( assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format(
backbone) backbone)
# url = image_pretrain[backbone]
# fname = osp.split(url)[-1].split('.')[0] # if backbone == 'AlexNet':
# paddlex.utils.download_and_decompress(url, path=new_save_dir) # url = image_pretrain[backbone]
# return osp.join(new_save_dir, fname) # fname = osp.split(url)[-1].split('.')[0]
# paddlex.utils.download_and_decompress(url, path=new_save_dir)
# return osp.join(new_save_dir, fname)
try: try:
hub.download(backbone, save_path=new_save_dir) hub.download(backbone, save_path=new_save_dir)
except Exception as e: except Exception as e:
...@@ -103,17 +207,20 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): ...@@ -103,17 +207,20 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
raise Exception( raise Exception(
"Unexpected error, please make sure paddlehub >= 1.6.2") "Unexpected error, please make sure paddlehub >= 1.6.2")
return osp.join(new_save_dir, backbone) return osp.join(new_save_dir, backbone)
elif flag == 'COCO': elif flag in ['COCO', 'CITYSCAPES']:
new_save_dir = save_dir new_save_dir = save_dir
if hasattr(paddlex, 'pretrain_dir'): if hasattr(paddlex, 'pretrain_dir'):
new_save_dir = paddlex.pretrain_dir new_save_dir = paddlex.pretrain_dir
url = coco_pretrain[backbone] if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN', 'DeepLabv3p']:
backbone = '{}_{}'.format(class_name, backbone)
backbone = "{}_{}".format(backbone, flag)
if flag == 'COCO':
url = coco_pretrain[backbone]
elif flag == 'CITYSCAPES':
url = cityscapes_pretrain[backbone]
fname = osp.split(url)[-1].split('.')[0] fname = osp.split(url)[-1].split('.')[0]
# paddlex.utils.download_and_decompress(url, path=new_save_dir) # paddlex.utils.download_and_decompress(url, path=new_save_dir)
# return osp.join(new_save_dir, fname) # return osp.join(new_save_dir, fname)
assert backbone in coco_pretrain, "There is not COCO pretrain weights for {}, you may try ImageNet.".format(
backbone)
try: try:
hub.download(backbone, save_path=new_save_dir) hub.download(backbone, save_path=new_save_dir)
except Exception as e: except Exception as e:
...@@ -130,5 +237,5 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): ...@@ -130,5 +237,5 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
return osp.join(new_save_dir, backbone) return osp.join(new_save_dir, backbone)
else: else:
raise Exception( raise Exception(
"pretrain_weights need to be defined as directory path or `IMAGENET` or 'COCO' (download pretrain weights automatically)." "pretrain_weights need to be defined as directory path or 'IMAGENET' or 'COCO' or 'Cityscapes' (download pretrain weights automatically)."
) )
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
# -*- coding: utf-8 -*
import os import os
import cv2 import cv2
import colorsys import colorsys
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
import math import math
...@@ -128,8 +128,16 @@ class YOLOv3(BaseAPI): ...@@ -128,8 +128,16 @@ class YOLOv3(BaseAPI):
lr_decay_epochs, lr_decay_gamma, lr_decay_epochs, lr_decay_gamma,
num_steps_each_epoch): num_steps_each_epoch):
if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch:
raise Exception("warmup_steps should less than {}".format( logging.error(
lr_decay_epochs[0] * num_steps_each_epoch)) "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset",
exit=False)
logging.error(
"See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice",
exit=False)
logging.error(
"warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function".
format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps
// num_steps_each_epoch))
boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs]
values = [(lr_decay_gamma**i) * learning_rate values = [(lr_decay_gamma**i) * learning_rate
for i in range(len(lr_decay_epochs) + 1)] for i in range(len(lr_decay_epochs) + 1)]
...@@ -180,7 +188,8 @@ class YOLOv3(BaseAPI): ...@@ -180,7 +188,8 @@ class YOLOv3(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为10。 log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为10。
save_dir (str): 模型保存路径。默认值为'output'。 save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的学习率。默认为1.0/8000。 learning_rate (float): 默认优化器的学习率。默认为1.0/8000。
...@@ -277,8 +286,7 @@ class YOLOv3(BaseAPI): ...@@ -277,8 +286,7 @@ class YOLOv3(BaseAPI):
eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
""" """
self.arrange_transforms( self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval')
transforms=eval_dataset.transforms, mode='eval')
if metric is None: if metric is None:
if hasattr(self, 'metric') and self.metric is not None: if hasattr(self, 'metric') and self.metric is not None:
metric = self.metric metric = self.metric
...@@ -298,19 +306,17 @@ class YOLOv3(BaseAPI): ...@@ -298,19 +306,17 @@ class YOLOv3(BaseAPI):
data_generator = eval_dataset.generator( data_generator = eval_dataset.generator(
batch_size=batch_size, drop_last=False) batch_size=batch_size, drop_last=False)
logging.info( logging.info("Start to evaluating(total_samples={}, total_steps={})...".
"Start to evaluating(total_samples={}, total_steps={})...".format( format(eval_dataset.num_samples, total_steps))
eval_dataset.num_samples, total_steps))
for step, data in tqdm.tqdm( for step, data in tqdm.tqdm(
enumerate(data_generator()), total=total_steps): enumerate(data_generator()), total=total_steps):
images = np.array([d[0] for d in data]) images = np.array([d[0] for d in data])
im_sizes = np.array([d[1] for d in data]) im_sizes = np.array([d[1] for d in data])
feed_data = {'image': images, 'im_size': im_sizes} feed_data = {'image': images, 'im_size': im_sizes}
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed=[feed_data],
feed=[feed_data], fetch_list=list(self.test_outputs.values()),
fetch_list=list(self.test_outputs.values()), return_numpy=False)
return_numpy=False)
res = { res = {
'bbox': (np.array(outputs[0]), 'bbox': (np.array(outputs[0]),
outputs[0].recursive_sequence_lengths()) outputs[0].recursive_sequence_lengths())
...@@ -326,13 +332,13 @@ class YOLOv3(BaseAPI): ...@@ -326,13 +332,13 @@ class YOLOv3(BaseAPI):
res['gt_label'] = (res_gt_label, []) res['gt_label'] = (res_gt_label, [])
res['is_difficult'] = (res_is_difficult, []) res['is_difficult'] = (res_is_difficult, [])
results.append(res) results.append(res)
logging.debug("[EVAL] Epoch={}, Step={}/{}".format( logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
epoch_id, step + 1, total_steps)) 1, total_steps))
box_ap_stats, eval_details = eval_results( box_ap_stats, eval_details = eval_results(
results, metric, eval_dataset.coco_gt, with_background=False) results, metric, eval_dataset.coco_gt, with_background=False)
evaluate_metrics = OrderedDict( evaluate_metrics = OrderedDict(
zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], zip(['bbox_mmap'
box_ap_stats)) if metric == 'COCO' else 'bbox_map'], box_ap_stats))
if return_details: if return_details:
return evaluate_metrics, eval_details return evaluate_metrics, eval_details
return evaluate_metrics return evaluate_metrics
...@@ -346,7 +352,8 @@ class YOLOv3(BaseAPI): ...@@ -346,7 +352,8 @@ class YOLOv3(BaseAPI):
Returns: Returns:
list: 预测结果列表,每个预测结果由预测框类别标签、 list: 预测结果列表,每个预测结果由预测框类别标签、
预测框类别名称、预测框坐标、预测框得分组成。 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
预测框得分组成。
""" """
if transforms is None and not hasattr(self, 'test_transforms'): if transforms is None and not hasattr(self, 'test_transforms'):
raise Exception("transforms need to be defined, now is None.") raise Exception("transforms need to be defined, now is None.")
...@@ -359,14 +366,12 @@ class YOLOv3(BaseAPI): ...@@ -359,14 +366,12 @@ class YOLOv3(BaseAPI):
im, im_size = self.test_transforms(img_file) im, im_size = self.test_transforms(img_file)
im = np.expand_dims(im, axis=0) im = np.expand_dims(im, axis=0)
im_size = np.expand_dims(im_size, axis=0) im_size = np.expand_dims(im_size, axis=0)
outputs = self.exe.run( outputs = self.exe.run(self.test_prog,
self.test_prog, feed={'image': im,
feed={ 'im_size': im_size},
'image': im, fetch_list=list(self.test_outputs.values()),
'im_size': im_size return_numpy=False,
}, use_program_cache=True)
fetch_list=list(self.test_outputs.values()),
return_numpy=False)
res = { res = {
k: (np.array(v), v.recursive_sequence_lengths()) k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(list(self.test_outputs.keys()), outputs) for k, v in zip(list(self.test_outputs.keys()), outputs)
......
...@@ -20,9 +20,12 @@ from .mobilenet_v2 import MobileNetV2 ...@@ -20,9 +20,12 @@ from .mobilenet_v2 import MobileNetV2
from .mobilenet_v3 import MobileNetV3 from .mobilenet_v3 import MobileNetV3
from .segmentation import UNet from .segmentation import UNet
from .segmentation import DeepLabv3p from .segmentation import DeepLabv3p
from .segmentation import FastSCNN
from .xception import Xception from .xception import Xception
from .densenet import DenseNet from .densenet import DenseNet
from .shufflenet_v2 import ShuffleNetV2 from .shufflenet_v2 import ShuffleNetV2
from .hrnet import HRNet
from .alexnet import AlexNet
def resnet18(input, num_classes=1000): def resnet18(input, num_classes=1000):
...@@ -51,14 +54,20 @@ def resnet50_vd(input, num_classes=1000): ...@@ -51,14 +54,20 @@ def resnet50_vd(input, num_classes=1000):
def resnet50_vd_ssld(input, num_classes=1000): def resnet50_vd_ssld(input, num_classes=1000):
model = ResNet(layers=50, num_classes=num_classes, model = ResNet(
variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) layers=50,
num_classes=num_classes,
variant='d',
lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
return model(input) return model(input)
def resnet101_vd_ssld(input, num_classes=1000): def resnet101_vd_ssld(input, num_classes=1000):
model = ResNet(layers=101, num_classes=num_classes, model = ResNet(
variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) layers=101,
num_classes=num_classes,
variant='d',
lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3])
return model(input) return model(input)
...@@ -93,14 +102,18 @@ def mobilenetv3_large(input, num_classes=1000): ...@@ -93,14 +102,18 @@ def mobilenetv3_large(input, num_classes=1000):
def mobilenetv3_small_ssld(input, num_classes=1000): def mobilenetv3_small_ssld(input, num_classes=1000):
model = MobileNetV3(num_classes=num_classes, model_name='small', model = MobileNetV3(
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) num_classes=num_classes,
model_name='small',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
return model(input) return model(input)
def mobilenetv3_large_ssld(input, num_classes=1000): def mobilenetv3_large_ssld(input, num_classes=1000):
model = MobileNetV3(num_classes=num_classes, model_name='large', model = MobileNetV3(
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) num_classes=num_classes,
model_name='large',
lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75])
return model(input) return model(input)
...@@ -133,6 +146,17 @@ def densenet201(input, num_classes=1000): ...@@ -133,6 +146,17 @@ def densenet201(input, num_classes=1000):
model = DenseNet(layers=201, num_classes=num_classes) model = DenseNet(layers=201, num_classes=num_classes)
return model(input) return model(input)
def shufflenetv2(input, num_classes=1000): def shufflenetv2(input, num_classes=1000):
model = ShuffleNetV2(num_classes=num_classes) model = ShuffleNetV2(num_classes=num_classes)
return model(input) return model(input)
def hrnet_w18(input, num_classes=1000):
model = HRNet(width=18, num_classes=num_classes)
return model(input)
def alexnet(input, num_classes=1000):
model = AlexNet(num_classes=num_classes)
return model(input)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
class AlexNet():
def __init__(self, num_classes=1000):
assert num_classes is not None, "In AlextNet, num_classes cannot be None"
self.num_classes = num_classes
def __call__(self, input):
stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11)
layer_name = [
"conv1", "conv2", "conv3", "conv4", "conv5", "fc6", "fc7", "fc8"
]
conv1 = fluid.layers.conv2d(
input=input,
num_filters=64,
filter_size=11,
stride=4,
padding=2,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[0] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[0] + "_weights"))
pool1 = fluid.layers.pool2d(
input=conv1,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
stdv = 1.0 / math.sqrt(pool1.shape[1] * 5 * 5)
conv2 = fluid.layers.conv2d(
input=pool1,
num_filters=192,
filter_size=5,
stride=1,
padding=2,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[1] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[1] + "_weights"))
pool2 = fluid.layers.pool2d(
input=conv2,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
stdv = 1.0 / math.sqrt(pool2.shape[1] * 3 * 3)
conv3 = fluid.layers.conv2d(
input=pool2,
num_filters=384,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[2] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[2] + "_weights"))
stdv = 1.0 / math.sqrt(conv3.shape[1] * 3 * 3)
conv4 = fluid.layers.conv2d(
input=conv3,
num_filters=256,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[3] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[3] + "_weights"))
stdv = 1.0 / math.sqrt(conv4.shape[1] * 3 * 3)
conv5 = fluid.layers.conv2d(
input=conv4,
num_filters=256,
filter_size=3,
stride=1,
padding=1,
groups=1,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[4] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[4] + "_weights"))
pool5 = fluid.layers.pool2d(
input=conv5,
pool_size=3,
pool_stride=2,
pool_padding=0,
pool_type='max')
drop6 = fluid.layers.dropout(x=pool5, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop6.shape[1] * drop6.shape[2] *
drop6.shape[3] * 1.0)
fc6 = fluid.layers.fc(
input=drop6,
size=4096,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[5] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[5] + "_weights"))
drop7 = fluid.layers.dropout(x=fc6, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop7.shape[1] * 1.0)
fc7 = fluid.layers.fc(
input=drop7,
size=4096,
act='relu',
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[6] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[6] + "_weights"))
stdv = 1.0 / math.sqrt(fc7.shape[1] * 1.0)
out = fluid.layers.fc(
input=fc7,
size=self.num_classes,
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[7] + "_offset"),
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=layer_name[7] + "_weights"))
return out
...@@ -68,13 +68,14 @@ class DarkNet(object): ...@@ -68,13 +68,14 @@ class DarkNet(object):
bias_attr=False) bias_attr=False)
bn_name = name + ".bn" bn_name = name + ".bn"
if self.num_classes:
regularizer = None
else:
regularizer = L2Decay(float(self.norm_decay))
bn_param_attr = ParamAttr( bn_param_attr = ParamAttr(
regularizer=L2Decay(float(self.norm_decay)), regularizer=regularizer, name=bn_name + '.scale')
name=bn_name + '.scale')
bn_bias_attr = ParamAttr( bn_bias_attr = ParamAttr(
regularizer=L2Decay(float(self.norm_decay)), regularizer=regularizer, name=bn_name + '.offset')
name=bn_name + '.offset')
out = fluid.layers.batch_norm( out = fluid.layers.batch_norm(
input=conv, input=conv,
...@@ -182,4 +183,4 @@ class DarkNet(object): ...@@ -182,4 +183,4 @@ class DarkNet(object):
bias_attr=ParamAttr(name='fc_offset')) bias_attr=ParamAttr(name='fc_offset'))
return out return out
return blocks return blocks
\ No newline at end of file
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
...@@ -173,4 +173,4 @@ class DenseNet(object): ...@@ -173,4 +173,4 @@ class DenseNet(object):
bn_ac_conv = fluid.layers.dropout( bn_ac_conv = fluid.layers.dropout(
x=bn_ac_conv, dropout_prob=dropout) x=bn_ac_conv, dropout_prob=dropout)
bn_ac_conv = fluid.layers.concat([input, bn_ac_conv], axis=1) bn_ac_conv = fluid.layers.concat([input, bn_ac_conv], axis=1)
return bn_ac_conv return bn_ac_conv
\ No newline at end of file
...@@ -21,7 +21,7 @@ import copy ...@@ -21,7 +21,7 @@ import copy
from paddle import fluid from paddle import fluid
from .fpn import FPN from .fpn import (FPN, HRFPN)
from .rpn_head import (RPNHead, FPNRPNHead) from .rpn_head import (RPNHead, FPNRPNHead)
from .roi_extractor import (RoIAlign, FPNRoIAlign) from .roi_extractor import (RoIAlign, FPNRoIAlign)
from .bbox_head import (BBoxHead, TwoFCHead) from .bbox_head import (BBoxHead, TwoFCHead)
...@@ -82,7 +82,12 @@ class FasterRCNN(object): ...@@ -82,7 +82,12 @@ class FasterRCNN(object):
self.backbone = backbone self.backbone = backbone
self.mode = mode self.mode = mode
if with_fpn and fpn is None: if with_fpn and fpn is None:
fpn = FPN() if self.backbone.__class__.__name__.startswith('HRNet'):
fpn = HRFPN()
fpn.min_level = 2
fpn.max_level = 6
else:
fpn = FPN()
self.fpn = fpn self.fpn = fpn
self.num_classes = num_classes self.num_classes = num_classes
if rpn_head is None: if rpn_head is None:
......
...@@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr ...@@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier from paddle.fluid.initializer import Xavier
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
__all__ = ['FPN'] __all__ = ['FPN', 'HRFPN']
def ConvNorm(input, def ConvNorm(input,
...@@ -219,8 +219,8 @@ class FPN(object): ...@@ -219,8 +219,8 @@ class FPN(object):
body_name = body_name_list[i] body_name = body_name_list[i]
body_input = body_dict[body_name] body_input = body_dict[body_name]
top_output = self.fpn_inner_output[i - 1] top_output = self.fpn_inner_output[i - 1]
fpn_inner_single = self._add_topdown_lateral( fpn_inner_single = self._add_topdown_lateral(body_name, body_input,
body_name, body_input, top_output) top_output)
self.fpn_inner_output[i] = fpn_inner_single self.fpn_inner_output[i] = fpn_inner_single
fpn_dict = {} fpn_dict = {}
fpn_name_list = [] fpn_name_list = []
...@@ -293,3 +293,107 @@ class FPN(object): ...@@ -293,3 +293,107 @@ class FPN(object):
spatial_scale.insert(0, spatial_scale[0] * 0.5) spatial_scale.insert(0, spatial_scale[0] * 0.5)
res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
return res_dict, spatial_scale return res_dict, spatial_scale
class HRFPN(object):
"""
HRNet, see https://arxiv.org/abs/1908.07919
Args:
num_chan (int): number of feature channels
pooling_type (str): pooling type of downsampling
share_conv (bool): whethet to share conv for different layers' reduction
spatial_scale (list): feature map scaling factor
"""
def __init__(
self,
num_chan=256,
pooling_type="avg",
share_conv=False,
spatial_scale=[1. / 64, 1. / 32, 1. / 16, 1. / 8, 1. / 4], ):
self.num_chan = num_chan
self.pooling_type = pooling_type
self.share_conv = share_conv
self.spatial_scale = spatial_scale
def get_output(self, body_dict):
num_out = len(self.spatial_scale)
body_name_list = list(body_dict.keys())
num_backbone_stages = len(body_name_list)
outs = []
outs.append(body_dict[body_name_list[0]])
# resize
for i in range(1, len(body_dict)):
resized = self.resize_input_tensor(body_dict[body_name_list[i]],
outs[0], 2**i)
outs.append(resized)
# concat
out = fluid.layers.concat(outs, axis=1)
# reduction
out = fluid.layers.conv2d(
input=out,
num_filters=self.num_chan,
filter_size=1,
stride=1,
padding=0,
param_attr=ParamAttr(name='hrfpn_reduction_weights'),
bias_attr=False)
# conv
outs = [out]
for i in range(1, num_out):
outs.append(
self.pooling(
out,
size=2**i,
stride=2**i,
pooling_type=self.pooling_type))
outputs = []
for i in range(num_out):
conv_name = "shared_fpn_conv" if self.share_conv else "shared_fpn_conv_" + str(
i)
conv = fluid.layers.conv2d(
input=outs[i],
num_filters=self.num_chan,
filter_size=3,
stride=1,
padding=1,
param_attr=ParamAttr(name=conv_name + "_weights"),
bias_attr=False)
outputs.append(conv)
for idx in range(0, num_out - len(body_name_list)):
body_name_list.append("fpn_res5_sum_subsampled_{}x".format(2**(
idx + 1)))
outputs = outputs[::-1]
body_name_list = body_name_list[::-1]
res_dict = OrderedDict([(body_name_list[k], outputs[k])
for k in range(len(body_name_list))])
return res_dict, self.spatial_scale
def resize_input_tensor(self, body_input, ref_output, scale):
shape = fluid.layers.shape(ref_output)
shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4])
out_shape_ = shape_hw
out_shape = fluid.layers.cast(out_shape_, dtype='int32')
out_shape.stop_gradient = True
body_output = fluid.layers.resize_bilinear(
body_input, scale=scale, out_shape=out_shape)
return body_output
def pooling(self, input, size, stride, pooling_type):
pool = fluid.layers.pool2d(
input=input,
pool_size=size,
pool_stride=stride,
pool_type=pooling_type)
return pool
...@@ -21,7 +21,7 @@ import copy ...@@ -21,7 +21,7 @@ import copy
import paddle.fluid as fluid import paddle.fluid as fluid
from .fpn import FPN from .fpn import (FPN, HRFPN)
from .rpn_head import (RPNHead, FPNRPNHead) from .rpn_head import (RPNHead, FPNRPNHead)
from .roi_extractor import (RoIAlign, FPNRoIAlign) from .roi_extractor import (RoIAlign, FPNRoIAlign)
from .bbox_head import (BBoxHead, TwoFCHead) from .bbox_head import (BBoxHead, TwoFCHead)
...@@ -92,11 +92,15 @@ class MaskRCNN(object): ...@@ -92,11 +92,15 @@ class MaskRCNN(object):
self.backbone = backbone self.backbone = backbone
self.mode = mode self.mode = mode
if with_fpn and fpn is None: if with_fpn and fpn is None:
fpn = FPN( if self.backbone.__class__.__name__.startswith('HRNet'):
num_chan=num_chan, fpn = HRFPN()
min_level=min_level, fpn.min_level = 2
max_level=max_level, fpn.max_level = 6
spatial_scale=spatial_scale) else:
fpn = FPN(num_chan=num_chan,
min_level=min_level,
max_level=max_level,
spatial_scale=spatial_scale)
self.fpn = fpn self.fpn = fpn
self.num_classes = num_classes self.num_classes = num_classes
if rpn_head is None: if rpn_head is None:
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.framework import Variable
from paddle.fluid.regularizer import L2Decay
from numbers import Integral
from paddle.fluid.initializer import MSRA
import math
__all__ = ['HRNet']
class HRNet(object):
def __init__(self,
width=40,
has_se=False,
freeze_at=0,
norm_type='bn',
freeze_norm=False,
norm_decay=0.,
feature_maps=[2, 3, 4, 5],
num_classes=None):
super(HRNet, self).__init__()
if isinstance(feature_maps, Integral):
feature_maps = [feature_maps]
assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4"
assert len(feature_maps) > 0, "need one or more feature maps"
assert norm_type in ['bn', 'sync_bn']
self.width = width
self.has_se = has_se
self.channels = {
18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]],
}
self.freeze_at = freeze_at
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.feature_maps = feature_maps
self.num_classes = num_classes
self.end_points = []
return
def net(self, input):
width = self.width
channels_2, channels_3, channels_4 = self.channels[width]
num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
x = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=64,
stride=2,
if_act=True,
name='layer1_1')
x = self.conv_bn_layer(
input=x,
filter_size=3,
num_filters=64,
stride=2,
if_act=True,
name='layer1_2')
la1 = self.layer1(x, name='layer2')
tr1 = self.transition_layer([la1], [256], channels_2, name='tr1')
st2 = self.stage(tr1, num_modules_2, channels_2, name='st2')
tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2')
st3 = self.stage(tr2, num_modules_3, channels_3, name='st3')
tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3')
st4 = self.stage(tr3, num_modules_4, channels_4, name='st4')
# classification
if self.num_classes:
last_cls = self.last_cls_out(x=st4, name='cls_head')
y = last_cls[0]
last_num_filters = [256, 512, 1024]
for i in range(3):
y = fluid.layers.elementwise_add(
last_cls[i + 1],
self.conv_bn_layer(
input=y,
filter_size=3,
num_filters=last_num_filters[i],
stride=2,
name='cls_head_add' + str(i + 1)))
y = self.conv_bn_layer(
input=y,
filter_size=1,
num_filters=2048,
stride=1,
name='cls_head_last_conv')
pool = fluid.layers.pool2d(
input=y, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
size=self.num_classes,
param_attr=ParamAttr(
name='fc_weights',
initializer=fluid.initializer.Uniform(-stdv, stdv)),
bias_attr=ParamAttr(name='fc_offset'))
return out
# segmentation
if self.feature_maps == "stage4":
return st4
self.end_points = st4
return st4[-1]
def layer1(self, input, name=None):
conv = input
for i in range(4):
conv = self.bottleneck_block(
conv,
num_filters=64,
downsample=True if i == 0 else False,
name=name + '_' + str(i + 1))
return conv
def transition_layer(self, x, in_channels, out_channels, name=None):
num_in = len(in_channels)
num_out = len(out_channels)
out = []
for i in range(num_out):
if i < num_in:
if in_channels[i] != out_channels[i]:
residual = self.conv_bn_layer(
x[i],
filter_size=3,
num_filters=out_channels[i],
name=name + '_layer_' + str(i + 1))
out.append(residual)
else:
out.append(x[i])
else:
residual = self.conv_bn_layer(
x[-1],
filter_size=3,
num_filters=out_channels[i],
stride=2,
name=name + '_layer_' + str(i + 1))
out.append(residual)
return out
def branches(self, x, block_num, channels, name=None):
out = []
for i in range(len(channels)):
residual = x[i]
for j in range(block_num):
residual = self.basic_block(
residual,
channels[i],
name=name + '_branch_layer_' + str(i + 1) + '_' +
str(j + 1))
out.append(residual)
return out
def fuse_layers(self, x, channels, multi_scale_output=True, name=None):
out = []
for i in range(len(channels) if multi_scale_output else 1):
residual = x[i]
if self.feature_maps == "stage4":
shape = fluid.layers.shape(residual)
width = shape[-1]
height = shape[-2]
for j in range(len(channels)):
if j > i:
y = self.conv_bn_layer(
x[j],
filter_size=1,
num_filters=channels[i],
if_act=False,
name=name + '_layer_' + str(i + 1) + '_' + str(j + 1))
if self.feature_maps == "stage4":
y = fluid.layers.resize_bilinear(
input=y, out_shape=[height, width])
else:
y = fluid.layers.resize_nearest(
input=y, scale=2**(j - i))
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
elif j < i:
y = x[j]
for k in range(i - j):
if k == i - j - 1:
y = self.conv_bn_layer(
y,
filter_size=3,
num_filters=channels[i],
stride=2,
if_act=False,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1))
else:
y = self.conv_bn_layer(
y,
filter_size=3,
num_filters=channels[j],
stride=2,
name=name + '_layer_' + str(i + 1) + '_' +
str(j + 1) + '_' + str(k + 1))
residual = fluid.layers.elementwise_add(
x=residual, y=y, act=None)
residual = fluid.layers.relu(residual)
out.append(residual)
return out
def high_resolution_module(self,
x,
channels,
multi_scale_output=True,
name=None):
residual = self.branches(x, 4, channels, name=name)
out = self.fuse_layers(
residual,
channels,
multi_scale_output=multi_scale_output,
name=name)
return out
def stage(self,
x,
num_modules,
channels,
multi_scale_output=True,
name=None):
out = x
for i in range(num_modules):
if i == num_modules - 1 and multi_scale_output == False:
out = self.high_resolution_module(
out,
channels,
multi_scale_output=False,
name=name + '_' + str(i + 1))
else:
out = self.high_resolution_module(
out, channels, name=name + '_' + str(i + 1))
return out
def last_cls_out(self, x, name=None):
out = []
num_filters_list = [32, 64, 128, 256]
for i in range(len(x)):
out.append(
self.bottleneck_block(
input=x[i],
num_filters=num_filters_list[i],
name=name + 'conv_' + str(i + 1),
downsample=True))
return out
def basic_block(self,
input,
num_filters,
stride=1,
downsample=False,
name=None):
residual = input
conv = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=num_filters,
stride=stride,
name=name + '_conv1')
conv = self.conv_bn_layer(
input=conv,
filter_size=3,
num_filters=num_filters,
if_act=False,
name=name + '_conv2')
if downsample:
residual = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_filters,
if_act=False,
name=name + '_downsample')
if self.has_se:
conv = self.squeeze_excitation(
input=conv,
num_channels=num_filters,
reduction_ratio=16,
name=name + '_fc')
return fluid.layers.elementwise_add(x=residual, y=conv, act='relu')
def bottleneck_block(self,
input,
num_filters,
stride=1,
downsample=False,
name=None):
residual = input
conv = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_filters,
name=name + '_conv1')
conv = self.conv_bn_layer(
input=conv,
filter_size=3,
num_filters=num_filters,
stride=stride,
name=name + '_conv2')
conv = self.conv_bn_layer(
input=conv,
filter_size=1,
num_filters=num_filters * 4,
if_act=False,
name=name + '_conv3')
if downsample:
residual = self.conv_bn_layer(
input=input,
filter_size=1,
num_filters=num_filters * 4,
if_act=False,
name=name + '_downsample')
if self.has_se:
conv = self.squeeze_excitation(
input=conv,
num_channels=num_filters * 4,
reduction_ratio=16,
name=name + '_fc')
return fluid.layers.elementwise_add(x=residual, y=conv, act='relu')
def squeeze_excitation(self,
input,
num_channels,
reduction_ratio,
name=None):
pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(
input=pool,
size=num_channels / reduction_ratio,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_sqz_weights'),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
excitation = fluid.layers.fc(
input=squeeze,
size=num_channels,
act='sigmoid',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_exc_weights'),
bias_attr=ParamAttr(name=name + '_exc_offset'))
scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
return scale
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride=1,
padding=1,
num_groups=1,
if_act=True,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=num_groups,
act=None,
param_attr=ParamAttr(
initializer=MSRA(), name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = self._bn(input=conv, bn_name=bn_name)
if if_act:
bn = fluid.layers.relu(bn)
return bn
def _bn(self, input, act=None, bn_name=None):
norm_lr = 0. if self.freeze_norm else 1.
norm_decay = self.norm_decay
if self.num_classes or self.feature_maps == "stage4":
regularizer = None
pattr_initializer = fluid.initializer.Constant(1.0)
battr_initializer = fluid.initializer.Constant(0.0)
else:
regularizer = L2Decay(norm_decay)
pattr_initializer = None
battr_initializer = None
pattr = ParamAttr(
name=bn_name + '_scale',
learning_rate=norm_lr,
regularizer=regularizer,
initializer=pattr_initializer)
battr = ParamAttr(
name=bn_name + '_offset',
learning_rate=norm_lr,
regularizer=regularizer,
initializer=battr_initializer)
global_stats = True if self.freeze_norm else False
out = fluid.layers.batch_norm(
input=input,
act=act,
name=bn_name + '.output.1',
param_attr=pattr,
bias_attr=battr,
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
use_global_stats=global_stats)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
if self.freeze_norm:
scale.stop_gradient = True
bias.stop_gradient = True
return out
def __call__(self, input):
assert isinstance(input, Variable)
if isinstance(self.feature_maps, (list, tuple)):
assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \
"feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps)
res_endpoints = []
res = input
feature_maps = self.feature_maps
out = self.net(input)
if self.num_classes or self.feature_maps == "stage4":
return out
for i in feature_maps:
res = self.end_points[i - 2]
if i in self.feature_maps:
res_endpoints.append(res)
if self.freeze_at >= i:
res.stop_gradient = True
return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat)
for idx, feat in enumerate(res_endpoints)])
...@@ -79,10 +79,14 @@ class MobileNetV1(object): ...@@ -79,10 +79,14 @@ class MobileNetV1(object):
bn_name = name + "_bn" bn_name = name + "_bn"
norm_decay = self.norm_decay norm_decay = self.norm_decay
if self.num_classes:
regularizer = None
else:
regularizer = L2Decay(norm_decay)
bn_param_attr = ParamAttr( bn_param_attr = ParamAttr(
regularizer=L2Decay(norm_decay), name=bn_name + '_scale') regularizer=regularizer, name=bn_name + '_scale')
bn_bias_attr = ParamAttr( bn_bias_attr = ParamAttr(
regularizer=L2Decay(norm_decay), name=bn_name + '_offset') regularizer=regularizer, name=bn_name + '_offset')
return fluid.layers.batch_norm( return fluid.layers.batch_norm(
input=conv, input=conv,
act=act, act=act,
...@@ -189,12 +193,12 @@ class MobileNetV1(object): ...@@ -189,12 +193,12 @@ class MobileNetV1(object):
if self.num_classes: if self.num_classes:
out = fluid.layers.pool2d( out = fluid.layers.pool2d(
input=out, pool_type='avg', global_pooling=True) input=out, pool_type='avg', global_pooling=True)
output = fluid.layers.fc( output = fluid.layers.fc(input=out,
input=out, size=self.num_classes,
size=self.num_classes, param_attr=ParamAttr(
param_attr=ParamAttr( initializer=fluid.initializer.MSRA(),
initializer=fluid.initializer.MSRA(), name="fc7_weights"), name="fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset")) bias_attr=ParamAttr(name="fc7_offset"))
return output return output
if not self.with_extra_blocks: if not self.with_extra_blocks:
...@@ -213,4 +217,4 @@ class MobileNetV1(object): ...@@ -213,4 +217,4 @@ class MobileNetV1(object):
module17 = self._extra_block(module16, num_filters[3][0], module17 = self._extra_block(module16, num_filters[3][0],
num_filters[3][1], 1, 2, num_filters[3][1], 1, 2,
self.prefix_name + "conv7_4") self.prefix_name + "conv7_4")
return module11, module13, module14, module15, module16, module17 return module11, module13, module14, module15, module16, module17
\ No newline at end of file
此差异已折叠。
此差异已折叠。
...@@ -14,5 +14,7 @@ ...@@ -14,5 +14,7 @@
from .unet import UNet from .unet import UNet
from .deeplabv3p import DeepLabv3p from .deeplabv3p import DeepLabv3p
from .hrnet import HRNet
from .fast_scnn import FastSCNN
from .model_utils import libs from .model_utils import libs
from .model_utils import loss from .model_utils import loss
此差异已折叠。
此差异已折叠。
...@@ -27,7 +27,6 @@ from .model_utils.libs import sigmoid_to_softmax ...@@ -27,7 +27,6 @@ from .model_utils.libs import sigmoid_to_softmax
from .model_utils.loss import softmax_with_loss from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss from .model_utils.loss import bce_loss
import paddlex.utils.logging as logging
class UNet(object): class UNet(object):
...@@ -106,7 +105,8 @@ class UNet(object): ...@@ -106,7 +105,8 @@ class UNet(object):
name='weights', name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0), regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.33))
with scope("conv0"): with scope("conv0"):
data = bn_relu( data = bn_relu(
conv( conv(
...@@ -140,8 +140,7 @@ class UNet(object): ...@@ -140,8 +140,7 @@ class UNet(object):
name='weights', name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0), regularization_coeff=0.0),
initializer=fluid.initializer.XavierInitializer(), initializer=fluid.initializer.XavierInitializer(), )
)
with scope("up"): with scope("up"):
if self.upsample_mode == 'bilinear': if self.upsample_mode == 'bilinear':
short_cut_shape = fluid.layers.shape(short_cut) short_cut_shape = fluid.layers.shape(short_cut)
...@@ -197,7 +196,8 @@ class UNet(object): ...@@ -197,7 +196,8 @@ class UNet(object):
name='weights', name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0), regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.01))
with scope("logit"): with scope("logit"):
data = conv( data = conv(
data, data,
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册