diff --git a/configs/kunlun/ResNet50.yaml b/configs/kunlun/ResNet50.yaml new file mode 100644 index 0000000000000000000000000000000000000000..496d7df19c550ef90f886d7253b60ba2add61fab --- /dev/null +++ b/configs/kunlun/ResNet50.yaml @@ -0,0 +1,76 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.0078125 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 20 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 20 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/docs/zh_CN/extension/train_on_multiplatform_xpu.md b/docs/zh_CN/extension/train_on_multiplatform_xpu.md new file mode 100644 index 0000000000000000000000000000000000000000..d8b8918bf9464b565abd70ef2a297a11df0349d6 --- /dev/null +++ b/docs/zh_CN/extension/train_on_multiplatform_xpu.md @@ -0,0 +1,15 @@ +# 图像分类昆仑模型介绍(持续更新中) + +## 前言 + +* 文档介绍了目前昆仑支持的模型以及如何在昆仑设备上训练这些模型。支持昆仑的pddlePaddle安装参考install_kunlun(https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/paddle/install/install_Kunlun_zh.md) + +## 昆仑训练 +* 数据来源参考[ImageNet1k](https://github.com/PaddlePaddle/PaddleClas/blob/dygraph/docs/en/tutorials/data_en.md)。昆仑训练效果与CPU/GPU对齐。 + +### ResNet50 +* 命令: + +```python3.7 tools/train_multi_platform.py -c configs/kunlun/ResNet50.yaml -o use_gpu=False -o use_xpu=True``` + +与cpu/gpu训练的区别是加上-o use_xpu=True, 表示执行在昆仑设备上。 diff --git a/tools/train_multi_platform.py b/tools/train_multi_platform.py index 6362d8beae1e6d58997e4dce85c6287cac9bc164..b5e1bbce48b1fbd341007eea1a61ab605e2aadfe 100644 --- a/tools/train_multi_platform.py +++ b/tools/train_multi_platform.py @@ -63,7 +63,17 @@ def main(args): config = get_config(args.config, overrides=args.override, show=True) # assign the place use_gpu = config.get("use_gpu", True) - places = fluid.cuda_places() if use_gpu else fluid.cpu_places() + use_xpu = config.get("use_xpu", False) + assert ( + use_gpu and use_xpu + ) is not True, "gpu and xpu can not be true in the same time in static mode!" + + if use_gpu: + places = fluid.cuda_places() + elif use_xpu: + places = fluid.xpu_places() + else: + places = fluid.cpu_places() # startup_prog is used to do some parameter init work, # and train prog is used to hold the network