diff --git a/dygraph/ocr_recognition/README.md b/dygraph/ocr_recognition/README.md new file mode 100644 index 0000000000000000000000000000000000000000..69c89fdc2899b8221861f9855eeff433c35c0f00 --- /dev/null +++ b/dygraph/ocr_recognition/README.md @@ -0,0 +1,33 @@ +DyGraph模式下ocr recognition实现 +======== + +简介 +-------- +ocr任务是识别图片单行的字母信息,在动态图下使用了带attention的seq2seq结构,静态图实现可以参考([ocr recognition](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/ocr_recognition)) +运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。 + + +## 代码结构 +``` +└── train.py # 训练脚本。 +└── data_reader.py # 数据读取。 +└── utility # 基础的函数。 +``` + +## 使用的数据 + +教程中使用`ocr attention`数据集作为训练数据,该数据集通过`paddle.dataset`模块自动下载到本地。 + +## 训练测试ocr recognition + +在GPU单卡上训练ocr recognition: + +``` +env CUDA_VISIBLE_DEVICES=0 python train.py +``` + +这里`CUDA_VISIBLE_DEVICES=0`表示是执行在0号设备卡上,请根据自身情况修改这个参数。 + +## 效果 + +在test测试集合上,最好的效果为82.0% diff --git a/dygraph/ocr_recognition/data_reader.py b/dygraph/ocr_recognition/data_reader.py index 00e98d12a57b33dd51a4de515a53738ff53cb2f2..41618ae18c8747bd83ba05dacdf88fdd97d01d8b 100644 --- a/dygraph/ocr_recognition/data_reader.py +++ b/dygraph/ocr_recognition/data_reader.py @@ -17,7 +17,6 @@ DATA_SHAPE = [1, 48, 512] DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5" DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz" -CACHE_DIR_NAME = "ctc_data" SAVED_FILE_NAME = "data.tar.gz" DATA_DIR_NAME = "data" TRAIN_DATA_DIR_NAME = "train_images" @@ -27,15 +26,14 @@ TEST_LIST_FILE_NAME = "test.list" class DataGenerator(object): - def __init__(self, model="crnn_ctc"): - self.model = model + def __init__(self): + pass def train_reader(self, img_root_dir, img_label_list, batchsize, cycle, - max_length, shuffle=True): ''' Reader interface for training. @@ -89,11 +87,6 @@ class DataGenerator(object): label = [int(c) for c in items[-1].split(',')] max_len = max(max_len, len(label)) - #print( "max len", max_len, i) - max_length = max_len - - #mask = np.zeros( (batchsize, max_length)).astype('float32') - for j in range(batchsize): line = img_label_lines[i * batchsize + j] items = line.split(' ') @@ -102,11 +95,11 @@ class DataGenerator(object): mask = np.zeros((max_len)).astype('float32') mask[:len(label) + 1] = 1.0 #mask[ j, :len(label) + 1] = 1.0 - if max_length > len(label) + 1: - extend_label = [EOS] * (max_length - len(label) - 1) + if max_len > len(label) + 1: + extend_label = [EOS] * (max_len - len(label) - 1) label.extend(extend_label) else: - label = label[0:max_length - 1] + label = label[0:max_len - 1] img = Image.open(os.path.join(img_root_dir, items[ 2])).convert('L') if j == 0: @@ -121,85 +114,6 @@ class DataGenerator(object): return reader - def test_reader(self, img_root_dir, img_label_list): - ''' - Reader interface for inference. - - :param img_root_dir: The root path of the images for training. - :type img_root_dir: str - - :param img_label_list: The path of the file for testing. - :type img_label_list: str - ''' - - def reader(): - for line in open(img_label_list): - # h, w, img_name, labels - items = line.split(' ') - - label = [int(c) for c in items[-1].split(',')] - img = Image.open(os.path.join(img_root_dir, items[2])).convert( - 'L') - img = np.array(img) - 127.5 - img = img[np.newaxis, ...] - if self.model == "crnn_ctc": - yield img, label - else: - yield img, [SOS] + label, label + [EOS] - - return reader - - def infer_reader(self, img_root_dir=None, img_label_list=None, cycle=False): - '''A reader interface for inference. - - :param img_root_dir: The root path of the images for training. - :type img_root_dir: str - - :param img_label_list: The path of the file for - inference. It should be the path of file if img_root_dir - was None. If img_label_list was set to None, it will read image path - from stdin. - :type img_root_dir: str - - :param cycle: If number of iterations is greater than dataset_size / - batch_size it reiterates dataset over as many times as necessary. - :type cycle: bool - ''' - - def reader(): - def yield_img_and_label(lines): - for line in lines: - if img_root_dir is not None: - # h, w, img_name, labels - img_name = line.split(' ')[2] - img_path = os.path.join(img_root_dir, img_name) - else: - img_path = line.strip("\t\n\r") - img = Image.open(img_path).convert('L') - img = np.array(img) - 127.5 - img = img[np.newaxis, ...] - label = [int(c) for c in line.split(' ')[3].split(',')] - yield img, label - - if img_label_list is not None: - lines = [] - with open(img_label_list) as f: - lines = f.readlines() - for img, label in yield_img_and_label(lines): - yield img, label - while cycle: - for img, label in yield_img_and_label(lines): - yield img, label - else: - while True: - img_path = input("Please input the path of image: ") - img = Image.open(img_path).convert('L') - img = np.array(img) - 127.5 - img = img[np.newaxis, ...] - yield img, [[0]] - - return reader - def num_classes(): '''Get classes number of this dataset. @@ -213,51 +127,31 @@ def data_shape(): return DATA_SHAPE -def train(batch_size, - max_length, - train_images_dir=None, - train_list_file=None, - cycle=False, - shuffle=False, - model="crnn_ctc"): - generator = DataGenerator(model) - if train_images_dir is None: - data_dir = download_data() - train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) - if train_list_file is None: - train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME) +def data_reader(batch_size, + images_dir=None, + list_file=None, + cycle=False, + shuffle=False, + data_type="train"): + generator = DataGenerator() + + if data_type == "train": + if images_dir is None: + data_dir = download_data() + images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME) + if list_file is None: + list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME) + elif data_type == "test": + if images_dir is None: + data_dir = download_data() + images_dir = path.join(data_dir, TEST_DATA_DIR_NAME) + if list_file is None: + list_file = path.join(data_dir, TEST_LIST_FILE_NAME) + else: + print("data type only support train | test") + raise Exception("data type only support train | test") return generator.train_reader( - train_images_dir, - train_list_file, - batch_size, - cycle, - max_length, - shuffle=shuffle) - - -def test(batch_size=1, - test_images_dir=None, - test_list_file=None, - model="crnn_ctc"): - generator = DataGenerator(model) - if test_images_dir is None: - data_dir = download_data() - test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME) - if test_list_file is None: - test_list_file = path.join(data_dir, TEST_LIST_FILE_NAME) - return paddle.batch( - generator.test_reader(test_images_dir, test_list_file), batch_size) - - -def inference(batch_size=1, - infer_images_dir=None, - infer_list_file=None, - cycle=False, - model="crnn_ctc"): - generator = DataGenerator(model) - return paddle.batch( - generator.infer_reader(infer_images_dir, infer_list_file, cycle), - batch_size) + images_dir, list_file, batch_size, cycle, shuffle=shuffle) def download_data(): diff --git a/dygraph/ocr_recognition/train.py b/dygraph/ocr_recognition/train.py index 954612af19279048ba74da4471245f62b61b4654..f333d3e293293f29b9e639c86415fa1fd824c786 100644 --- a/dygraph/ocr_recognition/train.py +++ b/dygraph/ocr_recognition/train.py @@ -42,7 +42,6 @@ add_arg('train_images', str, None, "The directory of images to be u add_arg('train_list', str, None, "The list file of images to be used for training.") add_arg('test_images', str, None, "The directory of images to be used for test.") add_arg('test_list', str, None, "The list file of images to be used for training.") -add_arg('model', str, "attention", "Which type of network to be used. 'crnn_ctc' or 'attention'") add_arg('init_model', str, None, "The init model file of directory.") add_arg('use_gpu', bool, True, "Whether use GPU to train.") add_arg('min_average_window',int, 10000, "Min average window.") @@ -78,10 +77,6 @@ class Config(object): # special label for start and end SOS = 0 EOS = 1 - # settings for ctc data, not use in unittest - DATA_DIR_NAME = "./dataset/ctc_data/data" - TRAIN_DATA_DIR_NAME = "train_images" - TRAIN_LIST_FILE_NAME = "train.list" # data shape for input image DATA_SHAPE = [1, 48, 512] @@ -478,24 +473,18 @@ def train(args): grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(5.0 ) - train_reader = data_reader.train( + train_reader = data_reader.data_reader( Config.batch_size, - max_length=Config.max_length, - train_images_dir=args.train_images, - train_list_file=args.train_list, cycle=args.total_step > 0, shuffle=True, - model=args.model) + data_type='train') infer_image= './data/data/test_images/' infer_files = './data/data/test.list' - test_reader = data_reader.train( + test_reader = data_reader.data_reader( Config.batch_size, - 1000, - train_images_dir= infer_image, - train_list_file= infer_files, cycle=False, - model=args.model) + data_type="test") def eval(): ocr_attention.eval() total_loss = 0.0 @@ -578,10 +567,6 @@ def train(args): total_loss = 0.0 if total_step > 0 and total_step % 2000 == 0: - - model_value = ocr_attention.state_dict() - np.savez( "model/" + str(total_step), **model_value ) - ocr_attention.eval() eval() ocr_attention.train()