提交 f9170fcf 编写于 作者: T tink2123

polish gen_label

上级 4d816b61
...@@ -14,6 +14,15 @@ wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/train_icdar2015_l ...@@ -14,6 +14,15 @@ wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/train_icdar2015_l
wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/test_icdar2015_label.txt wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/test_icdar2015_label.txt
``` ```
PaddleOCR 也提供了数据格式转换脚本,可以将官网 label 转换支持的数据格式。 数据转换工具在 `train_data/gen_label.py`, 这里以训练集为例:
```
# 将官网下载的标签文件转换为 train_icdar2015_label.txt
python gen_label.py --mode="det" --root_path="icdar_c4_train_imgs/" \
--input_path="ch4_training_localization_transcription_gt" \
--output_label="train_icdar2015_label.txt"
```
解压数据集和下载标注文件后,PaddleOCR/train_data/ 有两个文件夹和两个文件,分别是: 解压数据集和下载标注文件后,PaddleOCR/train_data/ 有两个文件夹和两个文件,分别是:
``` ```
/PaddleOCR/train_data/icdar2015/text_localization/ /PaddleOCR/train_data/icdar2015/text_localization/
......
...@@ -44,6 +44,13 @@ wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_t ...@@ -44,6 +44,13 @@ wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_t
wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt wget -P ./train_data/ic15_data https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
``` ```
PaddleOCR 也提供了数据格式转换脚本,可以将官网 label 转换支持的数据格式。 数据转换工具在 `train_data/gen_label.py`, 这里以训练集为例:
```
# 将官网下载的标签文件转换为 rec_gt_label.txt
python gen_label.py --mode="rec" --input_path="{path/of/origin/label}" --output_label="rec_gt_label.txt"
```
最终训练集应有如下文件结构: 最终训练集应有如下文件结构:
``` ```
|-train_data |-train_data
......
...@@ -3,23 +3,17 @@ import argparse ...@@ -3,23 +3,17 @@ import argparse
def gen_rec_label(input_path, out_label): def gen_rec_label(input_path, out_label):
out_file = open(out_label, 'w') with open(out_label, 'w') as out_file:
with open(input_path, 'r') as f: with open(input_path, 'r') as f:
for line in f.readlines(): for line in f.readlines():
tmp = line.strip('\n').replace(" ", "").split(',') tmp = line.strip('\n').replace(" ", "").split(',')
img_path, label = tmp[0], tmp[1] img_path, label = tmp[0], tmp[1]
label = label.replace("\"", "") label = label.replace("\"", "")
out_file.write(img_path + '\t' + label + '\n') out_file.write(img_path + '\t' + label + '\n')
out_file.close()
def gen_det_label(input_dir, out_label): def gen_det_label(root_path, input_dir, out_label):
root_path = "" with open(out_label, 'w') as out_file:
if "training" in input_dir:
root_path = "icdar_c4_train_imgs/"
elif "test" in input_dir:
root_path = "ch4_test_images/"
out_file = open(out_label, 'w')
for label_file in os.listdir(input_dir): for label_file in os.listdir(input_dir):
img_path = root_path + label_file[3:-4] + ".jpg" img_path = root_path + label_file[3:-4] + ".jpg"
label = [] label = []
...@@ -34,7 +28,6 @@ def gen_det_label(input_dir, out_label): ...@@ -34,7 +28,6 @@ def gen_det_label(input_dir, out_label):
result = {"transcription": tmp[-1], "points": s} result = {"transcription": tmp[-1], "points": s}
label.append(result) label.append(result)
out_file.write(img_path + '\t' + str(label) + '\n') out_file.write(img_path + '\t' + str(label) + '\n')
out_file.close()
if __name__ == "__main__": if __name__ == "__main__":
...@@ -44,6 +37,11 @@ if __name__ == "__main__": ...@@ -44,6 +37,11 @@ if __name__ == "__main__":
type=str, type=str,
default="rec", default="rec",
help='Generate rec_label or det_label, can be set rec or det') help='Generate rec_label or det_label, can be set rec or det')
parser.add_argument(
'--root_path',
type=str,
default=".",
help='The root directory of images.Only takes effect when mode=det ')
parser.add_argument( parser.add_argument(
'--input_path', '--input_path',
type=str, type=str,
...@@ -60,4 +58,4 @@ if __name__ == "__main__": ...@@ -60,4 +58,4 @@ if __name__ == "__main__":
print("Generate rec label") print("Generate rec label")
gen_rec_label(args.input_path, args.output_label) gen_rec_label(args.input_path, args.output_label)
elif args.mode == "det": elif args.mode == "det":
gen_det_label(args.input_path, args.output_label) gen_det_label(args.root_path, args.input_path, args.output_label)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册