From 79eac5d44551dbec2bc1eeebe05e2f135dbea84c Mon Sep 17 00:00:00 2001 From: root Date: Mon, 10 Feb 2020 10:38:40 +0000 Subject: [PATCH] add features: train with cpu, save and load checkpoint --- dygraph/mobilenet/README.md | 52 +++++++++++++++++++++ dygraph/mobilenet/run_cpu_v1.sh | 1 + dygraph/mobilenet/run_cpu_v2.sh | 1 + dygraph/mobilenet/run_mul_v1_checkpoint.sh | 2 + dygraph/mobilenet/run_mul_v2_checkpoint.sh | 2 + dygraph/mobilenet/run_sing_v1_checkpoint.sh | 2 + dygraph/mobilenet/run_sing_v2_checkpoint.sh | 2 + 7 files changed, 62 insertions(+) create mode 100644 dygraph/mobilenet/README.md create mode 100644 dygraph/mobilenet/run_cpu_v1.sh create mode 100644 dygraph/mobilenet/run_cpu_v2.sh create mode 100644 dygraph/mobilenet/run_mul_v1_checkpoint.sh create mode 100644 dygraph/mobilenet/run_mul_v2_checkpoint.sh create mode 100644 dygraph/mobilenet/run_sing_v1_checkpoint.sh create mode 100644 dygraph/mobilenet/run_sing_v2_checkpoint.sh diff --git a/dygraph/mobilenet/README.md b/dygraph/mobilenet/README.md new file mode 100644 index 00000000..5d3a3e64 --- /dev/null +++ b/dygraph/mobilenet/README.md @@ -0,0 +1,52 @@ +**模型简介** + +图像分类是计算机视觉的重要领域,它的目标是将图像分类到预定义的标签。CNN模型在图像分类领域取得了突破的成果,同时模型复杂度也在不断增加。MobileNet是一种小巧而高效CNN模型,本文介绍如何使PaddlePaddle的动态图MobileNet进行图像分类。 + +**代码结构** + + ├── run_mul_v1.sh # 多卡训练启动脚本_v1 + ├── run_mul_v2.sh # 多卡训练启动脚本_v2 + ├── run_sing_v1.sh # 单卡训练启动脚本_v1 + ├── run_sing_v2.sh # 单卡训练启动脚本_v2 + ├── run_cpu_v1.sh # CPU训练启动脚本_v1 + ├── run_cpu_v2.sh # CPU训练启动脚本_v2 + ├── train.py # 训练入口 + ├── mobilenet_v1.py # 网络结构v1 + ├── mobilenet_v2.py # 网络结构v2 + ├── reader.py # 数据reader + ├── utils # 基础工具目录 + +**数据准备** + +请参考:https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification + +**模型训练** + +若使用4卡训练,启动方式如下: + + bash run_mul_v1.sh + bash run_mul_v2.sh +若使用单卡训练,启动方式如下: + + bash run_sing_v1.sh + bash run_sing_v2.sh + +若使用CPU训练,启动方式如下: + + bash run_cpu_v1.sh + bash run_cpu_v2.sh + + +**模型性能** + + Model Top-1(单卡/4卡) Top-5(单卡/4卡) 收敛时间(单卡/4卡) + + MobileNetV1 0.707/0.711 0.897/0.899 116小时/30.9小时 + + MobileNetV2 0.708/0.724 0.899/0.906 227.8小时/60.8小时 + +**参考论文** + +MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications, Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam + +MobileNetV2: Inverted Residuals and Linear Bottlenecks, Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen diff --git a/dygraph/mobilenet/run_cpu_v1.sh b/dygraph/mobilenet/run_cpu_v1.sh new file mode 100644 index 00000000..81de4df3 --- /dev/null +++ b/dygraph/mobilenet/run_cpu_v1.sh @@ -0,0 +1 @@ +python3 train.py --use_gpu=False --batch_size=64 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=./data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1 diff --git a/dygraph/mobilenet/run_cpu_v2.sh b/dygraph/mobilenet/run_cpu_v2.sh new file mode 100644 index 00000000..4c18c006 --- /dev/null +++ b/dygraph/mobilenet/run_cpu_v2.sh @@ -0,0 +1 @@ +python3 train.py --use_gpu=False --batch_size=64 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=cosine_decay --lr=0.1 --num_epochs=240 --data_dir=/ssd9/chaj//data/ILSVRC2012 --l2_decay=4e-5 --model=MobileNetV2 diff --git a/dygraph/mobilenet/run_mul_v1_checkpoint.sh b/dygraph/mobilenet/run_mul_v1_checkpoint.sh new file mode 100644 index 00000000..6b511f19 --- /dev/null +++ b/dygraph/mobilenet/run_mul_v1_checkpoint.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +python3 -m paddle.distributed.launch --log_dir ./mylog.v1.checkpoint train.py --use_data_parallel 1 --batch_size=256 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --lr_strategy=piecewise_decay --lr=0.1 --data_dir=./data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1 --model_save_dir=output.v1.mul.checkpoint/ --num_epochs=120 --checkpoint=./output.v1.mul/_mobilenet_v1_epoch50 diff --git a/dygraph/mobilenet/run_mul_v2_checkpoint.sh b/dygraph/mobilenet/run_mul_v2_checkpoint.sh new file mode 100644 index 00000000..2b1b5587 --- /dev/null +++ b/dygraph/mobilenet/run_mul_v2_checkpoint.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +python3 -m paddle.distributed.launch --log_dir ./mylog.v2.checkpoint train.py --use_data_parallel 1 --batch_size=500 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output.v2.mul.checkpoint/ --lr_strategy=cosine_decay --lr=0.1 --num_epochs=240 --data_dir=./data/ILSVRC2012 --l2_decay=4e-5 --model=MobileNetV2 --checkpoint=./output.v2.mul/_mobilenet_v2_epoch50 diff --git a/dygraph/mobilenet/run_sing_v1_checkpoint.sh b/dygraph/mobilenet/run_sing_v1_checkpoint.sh new file mode 100644 index 00000000..47d68d96 --- /dev/null +++ b/dygraph/mobilenet/run_sing_v1_checkpoint.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES=0 +python3 train.py --batch_size=256 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output.v1.sing/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=./data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1 --checkpoint=./output.v1.sing/_mobilenet_v1_epoch50 diff --git a/dygraph/mobilenet/run_sing_v2_checkpoint.sh b/dygraph/mobilenet/run_sing_v2_checkpoint.sh new file mode 100644 index 00000000..ed77b221 --- /dev/null +++ b/dygraph/mobilenet/run_sing_v2_checkpoint.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES=0 +python3 train.py --batch_size=500 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output.v2.sing/ --lr_strategy=cosine_decay --lr=0.1 --num_epochs=240 --data_dir=./data/ILSVRC2012 --l2_decay=4e-5 --model=MobileNetV2 --checkpoint=./output.v2.sing/_mobilenet_v2_epoch50 -- GitLab