diff --git a/deploy/slim/README.md b/deploy/slim/README.md index 696446c10eaa00eb885037e570751e2cc53a9d58..6ac8bf04838236ce954b26d9a62e131c92588cdb 100644 --- a/deploy/slim/README.md +++ b/deploy/slim/README.md @@ -61,10 +61,10 @@ cd PaddleClas 训练指令如下: -* CPU/单机单卡启动 +* CPU ```bash -python3.7 deploy/slim/slim.py -m train -c ppcls/configs/slim/ResNet50_vd_quantalization.yaml -o Global.device cpu +python3.7 deploy/slim/slim.py -m train -c ppcls/configs/slim/ResNet50_vd_quantalization.yaml -o Global.device=cpu ``` 其中`yaml`文件解析详见[参考文档](../../docs/zh_CN/tutorials/config_description.md)。为了保证精度,`yaml`文件中已经使用`pretrained model`. @@ -102,10 +102,10 @@ python3.7 deploy/slim/quant_post_static.py -c ppcls/configs/ImageNet/ResNet/ResN 训练指令如下: -- CPU/单机单卡启动 +- CPU ```bash -python3.7 deploy/slim/slim.py -m export -c ppcls/configs/slim/ResNet50_vd_prune.yaml -o Global.device cpu +python3.7 deploy/slim/slim.py -m train -c ppcls/configs/slim/ResNet50_vd_prune.yaml -o Global.device=cpu ``` - 单机单卡/单机多卡/多机多卡启动 diff --git a/deploy/slim/README_en.md b/deploy/slim/README_en.md index bb82858de230605e1a6cddecb86eda35aaad7e21..85c2f4ce6ee077ed3bea16f7f047f9fdd6bc5f49 100644 --- a/deploy/slim/README_en.md +++ b/deploy/slim/README_en.md @@ -62,10 +62,10 @@ After the quantization strategy is defined, the model can be quantified. The training command is as follow: -* CPU/Single GPU training +* CPU ```bash -python3.7 deploy/slim/slim.py -m train -c ppcls/configs/slim/ResNet50_vd_quantalization.yaml -o Global.device cpu +python3.7 deploy/slim/slim.py -m train -c ppcls/configs/slim/ResNet50_vd_quantalization.yaml -o Global.device=cpu ``` The description of `yaml` file can be found in this [doc](../../docs/en/tutorials/config_en.md). To get better accuracy, the `pretrained model`is used in `yaml`. @@ -101,10 +101,10 @@ If run successfully, the directory `quant_post_static_model` is generated in `Gl #### 3.2 Model Pruning -- CPU/Single GPU training +- CPU ```bash -python3.7 deploy/slim/slim.py -m export -c ppcls/configs/slim/ResNet50_vd_prune.yaml -o Global.device cpu +python3.7 deploy/slim/slim.py -m train -c ppcls/configs/slim/ResNet50_vd_prune.yaml -o Global.device=cpu ``` - Distributed training diff --git a/deploy/slim/slim.py b/deploy/slim/slim.py index 9584fb4fb5563c147ea55fcdc267dae81a3a1a04..a7bb317c553ed77028685fc47b783b7f261676d8 100644 --- a/deploy/slim/slim.py +++ b/deploy/slim/slim.py @@ -23,6 +23,8 @@ import paddleslim from paddle.jit import to_static from paddleslim.analysis import dygraph_flops as flops import argparse +import paddle.distributed as dist +from visualdl import LogWriter __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.abspath(os.path.join(__dir__, '../../'))) @@ -30,8 +32,12 @@ from paddleslim.dygraph.quant import QAT from ppcls.engine.trainer import Trainer from ppcls.utils import config, logger -from ppcls.utils.save_load import load_dygraph_pretrain +from ppcls.utils.logger import init_logger +from ppcls.utils.config import print_config +from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url from ppcls.data import build_dataloader +from ppcls.arch import apply_to_static +from ppcls.arch import build_model quant_config = { # weight preprocess type, default is None and no preprocessing is performed. @@ -59,7 +65,75 @@ quant_config = { class Trainer_slim(Trainer): def __init__(self, config, mode="train"): - super().__init__(config, mode) + + self.mode = mode + self.config = config + self.output_dir = self.config['Global']['output_dir'] + + log_file = os.path.join(self.output_dir, self.config["Arch"]["name"], + f"{mode}.log") + init_logger(name='root', log_file=log_file) + print_config(config) + # set device + assert self.config["Global"]["device"] in ["cpu", "gpu", "xpu"] + self.device = paddle.set_device(self.config["Global"]["device"]) + # set dist + self.config["Global"][ + "distributed"] = paddle.distributed.get_world_size() != 1 + + if "Head" in self.config["Arch"]: + self.is_rec = True + else: + self.is_rec = False + + self.model = build_model(self.config["Arch"]) + # set @to_static for benchmark, skip this by default. + apply_to_static(self.config, self.model) + + if self.config["Global"]["pretrained_model"] is not None: + if self.config["Global"]["pretrained_model"].startswith("http"): + load_dygraph_pretrain_from_url( + self.model, self.config["Global"]["pretrained_model"]) + else: + load_dygraph_pretrain( + self.model, self.config["Global"]["pretrained_model"]) + + self.vdl_writer = None + if self.config['Global']['use_visualdl'] and mode == "train": + vdl_writer_path = os.path.join(self.output_dir, "vdl") + if not os.path.exists(vdl_writer_path): + os.makedirs(vdl_writer_path) + self.vdl_writer = LogWriter(logdir=vdl_writer_path) + logger.info('train with paddle {} and device {}'.format( + paddle.__version__, self.device)) + # init members + self.train_dataloader = None + self.eval_dataloader = None + self.gallery_dataloader = None + self.query_dataloader = None + self.eval_mode = self.config["Global"].get("eval_mode", + "classification") + self.amp = True if "AMP" in self.config else False + if self.amp and self.config["AMP"] is not None: + self.scale_loss = self.config["AMP"].get("scale_loss", 1.0) + self.use_dynamic_loss_scaling = self.config["AMP"].get( + "use_dynamic_loss_scaling", False) + else: + self.scale_loss = 1.0 + self.use_dynamic_loss_scaling = False + if self.amp: + AMP_RELATED_FLAGS_SETTING = { + 'FLAGS_cudnn_batchnorm_spatial_persistent': 1, + 'FLAGS_max_inplace_grad_add': 8, + } + paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) + self.train_loss_func = None + self.eval_loss_func = None + self.train_metric_func = None + self.eval_metric_func = None + self.use_dali = self.config['Global'].get("use_dali", False) + + # for slim pact = self.config["Slim"].get("quant", False) self.pact = pact.get("name", False) if pact else pact @@ -99,6 +173,11 @@ class Trainer_slim(Trainer): if self.quanter is None and self.pruner is None: logger.info("Training without slim") + # for distributed training + if self.config["Global"]["distributed"]: + dist.init_parallel_env() + self.model = paddle.DataParallel(self.model) + def export_inference_model(self): if os.path.exists( os.path.join(self.output_dir, self.config["Arch"]["name"],