AssertionError: invalid id[383702] for memory managers
Created by: yplovecc
代码: """ File: deeplabv3p.py """ import os
选择使用指定号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '3' os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
import paddlex as pdx from paddlex.seg import transforms
定义训练和验证时的transforms
https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
API说明:#train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[1024, 1024]) #eval_transforms = transforms.ComposedSegTransforms(mode='eval')
定义训练和验证时的transforms
crop_size=768 """ train_transforms = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.RandomRotate(), transforms.ResizeStepScaling(), transforms.RandomPaddingCrop(crop_size=crop_size), transforms.Normalize() ]) """ train_transforms = transforms.Compose([ transforms.RandomPaddingCrop(crop_size=crop_size, im_padding_value=128, label_padding_value=255), transforms.Normalize() ]) eval_transforms = transforms.Compose([ transforms.Normalize() ])
定义训练和验证所用的数据集
https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
API说明:train_dataset = pdx.datasets.SegDataset( data_dir='../dataset-all/', file_list='../dataset-all/train_dataset/train_list.txt', label_list='../dataset-all/labels.txt', transforms=train_transforms, shuffle=True) eval_dataset = pdx.datasets.SegDataset( data_dir='../dataset-all/', file_list='../dataset-all/test_dataset/test_list.txt', label_list='../dataset-all/labels.txt', transforms=eval_transforms)
初始化模型,并进行训练
可使用VisualDL查看训练指标
VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
https://0.0.0.0:8001即可
浏览器打开其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
num_classes = len(train_dataset.labels) print("num_classes: %d" % num_classes) model = pdx.seg.DeepLabv3p(num_classes=num_classes, backbone='Xception65', use_dice_loss=True, use_bce_loss=True, ignore_index=255) model.train( num_epochs=1000, train_dataset=train_dataset, train_batch_size=2, eval_dataset=eval_dataset, save_interval_epochs=100, learning_rate=0.005, save_dir='models/deeplabv3p_Xception65', use_vdl=True)
eval_metrics = model.evaluate(eval_dataset, batch_size=num_classes) print("eval_metrics:", eval_metrics)