# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ This code is refer from: https://github.com/zcswdt/Color_OCR_image_generator """ import os import random import PIL from PIL import Image, ImageDraw, ImageFont import json import argparse def get_char_lines(txt_root_path): """ desc:get corpus line """ txt_files = os.listdir(txt_root_path) char_lines = [] for txt in txt_files: f = open(os.path.join(txt_root_path, txt), mode='r', encoding='utf-8') lines = f.readlines() f.close() for line in lines: char_lines.append(line.strip()) return char_lines def get_horizontal_text_picture(image_file, chars, fonts_list, cf): """ desc:gen horizontal text picture """ img = Image.open(image_file) if img.mode != 'RGB': img = img.convert('RGB') img_w, img_h = img.size # random choice font font_path = random.choice(fonts_list) # random choice font size font_size = random.randint(cf.font_min_size, cf.font_max_size) font = ImageFont.truetype(font_path, font_size) ch_w = [] ch_h = [] for ch in chars: if int(PIL.__version__.split('.')[0]) < 10: wt, ht = font.getsize(ch) else: left, top, right, bottom = font.getbbox(ch) wt, ht = right - left, bottom - top ch_w.append(wt) ch_h.append(ht) f_w = sum(ch_w) f_h = max(ch_h) # add space char_space_width = max(ch_w) f_w += (char_space_width * (len(chars) - 1)) x1 = random.randint(0, img_w - f_w) y1 = random.randint(0, img_h - f_h) x2 = x1 + f_w y2 = y1 + f_h crop_y1 = y1 crop_x1 = x1 crop_y2 = y2 crop_x2 = x2 best_color = (0, 0, 0) draw = ImageDraw.Draw(img) for i, ch in enumerate(chars): draw.text((x1, y1), ch, best_color, font=font) x1 += (ch_w[i] + char_space_width) crop_img = img.crop((crop_x1, crop_y1, crop_x2, crop_y2)) return crop_img, chars def get_vertical_text_picture(image_file, chars, fonts_list, cf): """ desc:gen vertical text picture """ img = Image.open(image_file) if img.mode != 'RGB': img = img.convert('RGB') img_w, img_h = img.size # random choice font font_path = random.choice(fonts_list) # random choice font size font_size = random.randint(cf.font_min_size, cf.font_max_size) font = ImageFont.truetype(font_path, font_size) ch_w = [] ch_h = [] for ch in chars: if int(PIL.__version__.split('.')[0]) < 10: wt, ht = font.getsize(ch) else: left, top, right, bottom = font.getbbox(ch) wt, ht = right - left, bottom - top ch_w.append(wt) ch_h.append(ht) f_w = max(ch_w) f_h = sum(ch_h) x1 = random.randint(0, img_w - f_w) y1 = random.randint(0, img_h - f_h) x2 = x1 + f_w y2 = y1 + f_h crop_y1 = y1 crop_x1 = x1 crop_y2 = y2 crop_x2 = x2 best_color = (0, 0, 0) draw = ImageDraw.Draw(img) i = 0 for ch in chars: draw.text((x1, y1), ch, best_color, font=font) y1 = y1 + ch_h[i] i = i + 1 crop_img = img.crop((crop_x1, crop_y1, crop_x2, crop_y2)) crop_img = crop_img.transpose(Image.ROTATE_90) return crop_img, chars def get_fonts(fonts_path): """ desc: get all fonts """ font_files = os.listdir(fonts_path) fonts_list=[] for font_file in font_files: font_path=os.path.join(fonts_path, font_file) fonts_list.append(font_path) return fonts_list if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--num_img', type=int, default=30, help="Number of images to generate") parser.add_argument('--font_min_size', type=int, default=11) parser.add_argument('--font_max_size', type=int, default=12, help="Help adjust the size of the generated text and the size of the picture") parser.add_argument('--bg_path', type=str, default='./background', help='The generated text pictures will be pasted onto the pictures of this folder') parser.add_argument('--det_bg_path', type=str, default='./det_background', help='The generated text pictures will use the pictures of this folder as the background') parser.add_argument('--fonts_path', type=str, default='../../StyleText/fonts', help='The font used to generate the picture') parser.add_argument('--corpus_path', type=str, default='./corpus', help='The corpus used to generate the text picture') parser.add_argument('--output_dir', type=str, default='./output/', help='Images save dir') cf = parser.parse_args() # save path if not os.path.exists(cf.output_dir): os.mkdir(cf.output_dir) # get corpus txt_root_path = cf.corpus_path char_lines = get_char_lines(txt_root_path=txt_root_path) # get all fonts fonts_path = cf.fonts_path fonts_list = get_fonts(fonts_path) # rec bg img_root_path = cf.bg_path imnames=os.listdir(img_root_path) # det bg det_bg_path = cf.det_bg_path bg_pics = os.listdir(det_bg_path) # OCR det files det_val_file = open(cf.output_dir + 'det_gt_val.txt', 'w', encoding='utf-8') det_train_file = open(cf.output_dir + 'det_gt_train.txt', 'w', encoding='utf-8') # det imgs det_save_dir = 'imgs/' if not os.path.exists(cf.output_dir + det_save_dir): os.mkdir(cf.output_dir + det_save_dir) det_val_save_dir = 'imgs_val/' if not os.path.exists(cf.output_dir + det_val_save_dir): os.mkdir(cf.output_dir + det_val_save_dir) # OCR rec files rec_val_file = open(cf.output_dir + 'rec_gt_val.txt', 'w', encoding='utf-8') rec_train_file = open(cf.output_dir + 'rec_gt_train.txt', 'w', encoding='utf-8') # rec imgs rec_save_dir = 'rec_imgs/' if not os.path.exists(cf.output_dir + rec_save_dir): os.mkdir(cf.output_dir + rec_save_dir) rec_val_save_dir = 'rec_imgs_val/' if not os.path.exists(cf.output_dir + rec_val_save_dir): os.mkdir(cf.output_dir + rec_val_save_dir) val_ratio = cf.num_img * 0.2 # val dataset ratio print('start generating...') for i in range(0, cf.num_img): imname = random.choice(imnames) img_path = os.path.join(img_root_path, imname) rnd = random.random() # gen horizontal text picture if rnd < 0.5: gen_img, chars = get_horizontal_text_picture(img_path, char_lines[i], fonts_list, cf) ori_w, ori_h = gen_img.size gen_img = gen_img.crop((0, 3, ori_w, ori_h)) # gen vertical text picture else: gen_img, chars = get_vertical_text_picture(img_path, char_lines[i], fonts_list, cf) ori_w, ori_h = gen_img.size gen_img = gen_img.crop((3, 0, ori_w, ori_h)) ori_w, ori_h = gen_img.size # rec imgs save_img_name = str(i).zfill(4) + '.jpg' if i < val_ratio: save_dir = os.path.join(rec_val_save_dir, save_img_name) line = save_dir + '\t' + char_lines[i] + '\n' rec_val_file.write(line) else: save_dir = os.path.join(rec_save_dir, save_img_name) line = save_dir + '\t' + char_lines[i] + '\n' rec_train_file.write(line) gen_img.save(cf.output_dir + save_dir, quality = 95, subsampling=0) # det img # random choice bg bg_pic = random.sample(bg_pics, 1)[0] det_img = Image.open(os.path.join(det_bg_path, bg_pic)) # the PCB position is fixed, modify it according to your own scenario if bg_pic == '1.png': x1 = 38 y1 = 3 else: x1 = 34 y1 = 1 det_img.paste(gen_img, (x1, y1)) # text pos chars_pos = [[x1, y1], [x1 + ori_w, y1], [x1 + ori_w, y1 + ori_h], [x1, y1 + ori_h]] label = [{"transcription":char_lines[i], "points":chars_pos}] if i < val_ratio: save_dir = os.path.join(det_val_save_dir, save_img_name) det_val_file.write(save_dir + '\t' + json.dumps( label, ensure_ascii=False) + '\n') else: save_dir = os.path.join(det_save_dir, save_img_name) det_train_file.write(save_dir + '\t' + json.dumps( label, ensure_ascii=False) + '\n') det_img.save(cf.output_dir + save_dir, quality = 95, subsampling=0)