进行fine tune时报错 (#24589) · Issue · PaddlePaddle / Paddle

进行fine tune时报错

Created by: tea321000

为使您的问题得到快速解决，在建立Issue前，请您先通过如下方式搜索是否有相似问题:【搜索issue关键字】【使用labels筛选】【官方文档】

建立issue时，为快速解决问题，请您根据使用情况给出如下信息：

标题：简洁、精准描述您的问题，例如“ssd 模型前置lstm报错 ”
版本、环境信息： 1）PaddlePaddle版本：1.7.1 2）CPU：8核CPU 3）GPU：v100 4）系统环境：aistudio高级版
模型信息 1）模型名称：xception71_imagenet预训练模型 2）使用数据集名称：ADNI
复现信息：ADNI读入三维nii文件之后切片生成并保存256x256的二维npy文件，重写ImageClassificationReader的preprocess方法，令其读入256x256的npy文件，将其resize成224x224之后再复制三份变成3x224x224再返回以符合原来demo里返回的大小以及RGB三通道。
问题描述：在调用run_states = task.finetune_and_eval()之后，读取了一个batch的数据之后报错： Exception in thread Thread-5: Traceback (most recent call last): File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/threading.py", line 926, in _bootstrap_inner self.run() File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/threading.py", line 870, in run self._target(*self._args, **self._kwargs) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/reader.py", line 806, in thread_main six.reraise(sys.exc_info()) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/six.py", line 693, in reraise raise value File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/reader.py", line 790, in thread_main self._check_input_array(item) File "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/reader.py", line 777, in _check_input_array "\n\tFaild to convert input data to a regular ndarray :\n\t Usually " TypeError: Faild to convert input data to a regular ndarray :
- Usually this means the input data contains nested lists with different lengths.
- Check the reader function passed to 'decorate_batch_generator' to locate the data causes this issue.
- Please consider using 'fluid.create_lod_tensor' to convert it to a LoD-Tensor.

查看check_input_array()中的变量，得到报错的数据格式如下，可以看到在array后面不知道为何多出了一个像是标签label的'1'符号导致无法通过check，但查看train_list.txt并没有发现文本格式的错误，不知道原因： (array([[[ 0.296308 , 0.25897488, 0.22164178, ..., 6.0301347 , 4.4905605 , 3.0255744 ], [ 2.0780523 , 1.1955202 , 0.79825604, ..., 34.152626 , 34.070496 , 33.98842 ], [33.90652 , 33.820316 , 33.72079 , ..., 37.73026 , 37.750618 , 37.769886 ], ..., [50.575066 , 50.578205 , 50.580597 , ..., 49.258797 , 49.291706 , 49.33335 ], [49.377808 , 49.42314 , 49.46876 , ..., 44.466885 , 44.732124 , 45.01983 ], [45.31055 , 45.581875 , 45.850407 , ..., 47.230377 , 40.347664 , 33.46495 ]],

   [[ 0.296308  ,  0.25897488,  0.22164178, ...,  6.0301347 ,
      4.4905605 ,  3.0255744 ],
    [ 2.0780523 ,  1.1955202 ,  0.79825604, ..., 34.152626  ,
     34.070496  , 33.98842   ],
    [33.90652   , 33.820316  , 33.72079   , ..., 37.73026   ,
     37.750618  , 37.769886  ],
    ...,
    [50.575066  , 50.578205  , 50.580597  , ..., 49.258797  ,
     49.291706  , 49.33335   ],
    [49.377808  , 49.42314   , 49.46876   , ..., 44.466885  ,
     44.732124  , 45.01983   ],
    [45.31055   , 45.581875  , 45.850407  , ..., 47.230377  ,
     40.347664  , 33.46495   ]],

   [[ 0.296308  ,  0.25897488,  0.22164178, ...,  6.0301347 ,
      4.4905605 ,  3.0255744 ],
    [ 2.0780523 ,  1.1955202 ,  0.79825604, ..., 34.152626  ,
     34.070496  , 33.98842   ],
    [33.90652   , 33.820316  , 33.72079   , ..., 37.73026   ,
     37.750618  , 37.769886  ],
    ...,
    [50.575066  , 50.578205  , 50.580597  , ..., 49.258797  ,
     49.291706  , 49.33335   ],
    [49.377808  , 49.42314   , 49.46876   , ..., 44.466885  ,
     44.732124  , 45.01983   ],
    [45.31055   , 45.581875  , 45.850407  , ..., 47.230377  ,
     40.347664  , 33.46495   ]]], dtype=float32), '1')

重写的部分（只改变了preprocess方法）： class ImageClassificationReader(BaseReader): def init(self, image_width, image_height, dataset=None, channel_order="RGB", images_mean=None, images_std=None, data_augmentation=False, random_seed=None): super(ImageClassificationReader, self).init(dataset, random_seed) self.image_width = image_width self.image_height = image_height self.channel_order = channel_order self.data_augmentation = data_augmentation self.images_std = images_std self.images_mean = images_mean

    if self.images_mean is None:
        try:
            self.images_mean = self.dataset.images_mean
        except:
            self.images_mean = [0, 0, 0]
    self.images_mean = np.array(self.images_mean).reshape(3, 1, 1)

    if self.images_std is None:
        try:
            self.images_std = self.dataset.images_std
        except:
            self.images_std = [1, 1, 1]
    self.images_std = np.array(self.images_std).reshape(3, 1, 1)

    if self.channel_order not in channel_order_dict:
        raise ValueError(
            "The channel_order should in %s." % channel_order_dict.keys())

    if self.image_width <= 0 or self.image_height <= 0:
        raise ValueError("Image width and height should not be negative.")

def data_generator(self,
                   batch_size=1,
                   phase="train",
                   shuffle=False,
                   data=None,
                   return_list=True):
    if phase != 'predict' and not self.dataset:
        raise ValueError("The dataset is none and it's not allowed!")
    if phase == "train":
        shuffle = True
        if hasattr(self.dataset, "train_data"):
            # Compatible with ImageClassificationDataset which has done shuffle
            self.dataset.train_data()
            shuffle = False
        data = self.get_train_examples()
        self.num_examples['train'] = len(data)
    elif phase == "val" or phase == "dev":
        shuffle = False
        if hasattr(self.dataset, "validate_data"):
            # Compatible with ImageClassificationDataset
            self.dataset.validate_data()
            shuffle = False
        data = self.get_dev_examples()
        self.num_examples['dev'] = len(data)
    elif phase == "test":
        shuffle = False
        if hasattr(self.dataset, "test_data"):
            # Compatible with ImageClassificationDataset
            data = self.dataset.test_data()
            shuffle = False
        data = self.get_test_examples()
        self.num_examples['test'] = len(data)
    elif phase == "predict":
        shuffle = False
        data = data

    def preprocess(image_path):
        image=np.load(image_path)[0][0][0]
        image = np.resize(image, (self.image_width, self.image_height))
        image=np.stack((image,)*3, axis=0)
        # image = Image.open(image_path)
        # image = image_augmentation.image_resize(image, self.image_width,
        #                                         self.image_height)
        # if self.data_augmentation:
        #     image = image_augmentation.image_random_process(
        #         image, enable_resize=False, enable_crop=False)

        # # only support RGB
        # image = image.convert('RGB')

        # # HWC to CHW
        # image = np.array(image).astype('float32')
        # if len(image.shape) == 3:
        #     image = np.swapaxes(image, 1, 2)
        #     image = np.swapaxes(image, 1, 0)

        # # standardization
        # image /= 255
        # image -= self.images_mean
        # image /= self.images_std
        # image = image[channel_order_dict[self.channel_order], :, :]
        return image

    def _data_reader():
        if shuffle:
            np.random.shuffle(data)

        if phase == "predict":
            for image_path in data:
                image = preprocess(image_path)
                yield (image, )
        else:
            for image_path, label in data:
                image = preprocess(image_path)
                yield (image, label)

    return paddle.batch(_data_reader, batch_size=batch_size)

PaddlePaddle / Paddle 大约 2 年 前同步成功

进行fine tune时报错

PaddlePaddle / Paddle
大约 2 年前同步成功