diff --git a/configs/rec/rec_r31_robustscanner.yml b/configs/rec/rec_r31_robustscanner.yml index 40d39aee3c42c18085ace035944dba057b923245..6dd35394e80adc7ebba35adf0c6e0f471c53a75d 100644 --- a/configs/rec/rec_r31_robustscanner.yml +++ b/configs/rec/rec_r31_robustscanner.yml @@ -66,7 +66,8 @@ Metric: Train: dataset: name: LMDBDataSet - data_dir: ./train_data/data_lmdb_release/training/ + # data_dir: ./train_data/data_lmdb_release/training/ + data_dir: I:/dataset/OCR/STR/evaluation/evaluation/CUTE80 transforms: - DecodeImage: # load image img_mode: BGR @@ -88,7 +89,8 @@ Train: Eval: dataset: name: LMDBDataSet - data_dir: ./train_data/data_lmdb_release/evaluation/ + # data_dir: ./train_data/data_lmdb_release/evaluation/ + data_dir: I:/dataset/OCR/STR/evaluation/evaluation/CUTE80 transforms: - DecodeImage: # load image img_mode: BGR diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py index a5620f84f8cb00584215f2b839055e9a46c25c5a..8b2309f442e85e0ab03b9cd3d33e21090a7f4211 100644 --- a/ppocr/data/imaug/rec_img_aug.py +++ b/ppocr/data/imaug/rec_img_aug.py @@ -259,24 +259,6 @@ class PRENResizeImg(object): data['image'] = resized_img.astype(np.float32) return data -<<<<<<< HEAD -class RobustScannerRecResizeImg(object): - def __init__(self, image_shape, max_text_length, width_downsample_ratio=0.25, **kwargs): - self.image_shape = image_shape - self.width_downsample_ratio = width_downsample_ratio - self.max_text_length = max_text_length - - def __call__(self, data): - img = data['image'] - norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar( - img, self.image_shape, self.width_downsample_ratio) - word_positons = np.array(range(0, self.max_text_length)).astype('int64') - data['image'] = norm_img - data['resized_shape'] = resize_shape - data['pad_shape'] = pad_shape - data['valid_ratio'] = valid_ratio - data['word_positons'] = word_positons -======= class SPINRecResizeImg(object): def __init__(self, image_shape, @@ -319,7 +301,6 @@ class SPINRecResizeImg(object): img -= mean img *= stdinv data['image'] = img ->>>>>>> 1696b36bdb4152138ed5cb08a357df8fe03dc067 return data class GrayRecResizeImg(object): @@ -399,6 +380,23 @@ class SVTRRecResizeImg(object): data['valid_ratio'] = valid_ratio return data +class RobustScannerRecResizeImg(object): + def __init__(self, image_shape, max_text_length, width_downsample_ratio=0.25, **kwargs): + self.image_shape = image_shape + self.width_downsample_ratio = width_downsample_ratio + self.max_text_length = max_text_length + + def __call__(self, data): + img = data['image'] + norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar( + img, self.image_shape, self.width_downsample_ratio) + word_positons = np.array(range(0, self.max_text_length)).astype('int64') + data['image'] = norm_img + data['resized_shape'] = resize_shape + data['pad_shape'] = pad_shape + data['valid_ratio'] = valid_ratio + data['word_positons'] = word_positons + return data def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25): imgC, imgH, imgW_min, imgW_max = image_shape