gen_label.py 2.8 KB
Newer Older
T
tink2123 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
T
tink2123 已提交
14 15
import os
import argparse
littletomatodonkey's avatar
littletomatodonkey 已提交
16
import json
T
tink2123 已提交
17 18 19


def gen_rec_label(input_path, out_label):
T
tink2123 已提交
20 21 22 23 24 25 26
    with open(out_label, 'w') as out_file:
        with open(input_path, 'r') as f:
            for line in f.readlines():
                tmp = line.strip('\n').replace(" ", "").split(',')
                img_path, label = tmp[0], tmp[1]
                label = label.replace("\"", "")
                out_file.write(img_path + '\t' + label + '\n')
T
tink2123 已提交
27 28


T
tink2123 已提交
29 30 31 32 33 34 35
def gen_det_label(root_path, input_dir, out_label):
    with open(out_label, 'w') as out_file:
        for label_file in os.listdir(input_dir):
            img_path = root_path + label_file[3:-4] + ".jpg"
            label = []
            with open(os.path.join(input_dir, label_file), 'r') as f:
                for line in f.readlines():
littletomatodonkey's avatar
littletomatodonkey 已提交
36 37 38
                    tmp = line.strip("\n\r").replace("\xef\xbb\xbf",
                                                     "").split(',')
                    points = tmp[:8]
T
tink2123 已提交
39 40 41
                    s = []
                    for i in range(0, len(points), 2):
                        b = points[i:i + 2]
littletomatodonkey's avatar
littletomatodonkey 已提交
42
                        b = [int(t) for t in b]
T
tink2123 已提交
43
                        s.append(b)
littletomatodonkey's avatar
littletomatodonkey 已提交
44
                    result = {"transcription": tmp[8], "points": s}
T
tink2123 已提交
45
                    label.append(result)
littletomatodonkey's avatar
littletomatodonkey 已提交
46 47 48

            out_file.write(img_path + '\t' + json.dumps(
                label, ensure_ascii=False) + '\n')
T
tink2123 已提交
49 50 51 52 53 54 55 56 57


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default="rec",
        help='Generate rec_label or det_label, can be set rec or det')
T
tink2123 已提交
58 59 60 61 62
    parser.add_argument(
        '--root_path',
        type=str,
        default=".",
        help='The root directory of images.Only takes effect when mode=det ')
T
tink2123 已提交
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
    parser.add_argument(
        '--input_path',
        type=str,
        default=".",
        help='Input_label or input path to be converted')
    parser.add_argument(
        '--output_label',
        type=str,
        default="out_label.txt",
        help='Output file name')

    args = parser.parse_args()
    if args.mode == "rec":
        print("Generate rec label")
        gen_rec_label(args.input_path, args.output_label)
    elif args.mode == "det":
T
tink2123 已提交
79
        gen_det_label(args.root_path, args.input_path, args.output_label)