diff --git a/train_data/gen_label.py b/train_data/gen_label.py new file mode 100644 index 0000000000000000000000000000000000000000..ae0903b1b25456d6c770c071bc988bf994722077 --- /dev/null +++ b/train_data/gen_label.py @@ -0,0 +1,63 @@ +import os +import argparse + + +def gen_rec_label(input_path, out_label): + out_file = open(out_label, 'w') + with open(input_path, 'r') as f: + for line in f.readlines(): + tmp = line.strip('\n').replace(" ", "").split(',') + img_path, label = tmp[0], tmp[1] + label = label.replace("\"", "") + out_file.write(img_path + '\t' + label + '\n') + out_file.close() + + +def gen_det_label(input_dir, out_label): + root_path = "" + if "training" in input_dir: + root_path = "icdar_c4_train_imgs/" + elif "test" in input_dir: + root_path = "ch4_test_images/" + out_file = open(out_label, 'w') + for label_file in os.listdir(input_dir): + img_path = root_path + label_file[3:-4] + ".jpg" + label = [] + with open(os.path.join(input_dir, label_file), 'r') as f: + for line in f.readlines(): + tmp = line.strip("\n\r").replace("\xef\xbb\xbf", "").split(',') + points = tmp[:-2] + s = [] + for i in range(0, len(points), 2): + b = points[i:i + 2] + s.append(b) + result = {"transcription": tmp[-1], "points": s} + label.append(result) + out_file.write(img_path + '\t' + str(label) + '\n') + out_file.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '--mode', + type=str, + default="rec", + help='Generate rec_label or det_label, can be set rec or det') + parser.add_argument( + '--input_path', + type=str, + default=".", + help='Input_label or input path to be converted') + parser.add_argument( + '--output_label', + type=str, + default="out_label.txt", + help='Output file name') + + args = parser.parse_args() + if args.mode == "rec": + print("Generate rec label") + gen_rec_label(args.input_path, args.output_label) + elif args.mode == "det": + gen_det_label(args.input_path, args.output_label)