提交 a3465454 编写于 作者: S SunAhong1993

add dataset convert

上级 4b1e6b26
......@@ -50,6 +50,36 @@ def arg_parser():
action="store_true",
default=False,
help="export onnx model for deployment")
parser.add_argument(
"--data_conversion",
"-dc",
action="store_true",
default=False,
help="convert the dataset to the standard format")
parser.add_argument(
"--source",
"-se",
type=_text_type,
default=None,
help="define dataset format before the conversion")
parser.add_argument(
"--to",
"-to",
type=_text_type,
default=None,
help="define dataset format after the conversion")
parser.add_argument(
"--pics",
"-p",
type=_text_type,
default=None,
help="define pictures directory path")
parser.add_argument(
"--annotations",
"-a",
type=_text_type,
default=None,
help="define annotations directory path")
parser.add_argument(
"--fixed_input_shape",
"-fs",
......@@ -105,6 +135,24 @@ def main():
"paddlex --export_inference --model_dir model_path --save_dir infer_model"
)
pdx.convertor.export_onnx_model(model, args.save_dir)
if args.data_conversion:
assert args.source is not None, "--source should be defined while converting dataset"
assert args.to is not None, "--to should be defined to confirm the taregt dataset format"
assert args.pics is not None, "--pics should be defined to confirm the pictures path"
assert args.annotations is not None, "--annotations should be defined to confirm the annotations path"
assert args.save_dir is not None, "--save_dir should be defined to store taregt dataset"
if args.source == 'labelme' and args.to == 'ImageNet':
logging.error(
"The labelme dataset can not convert to the ImageNet dataset.",
exit=False)
if args.source == 'jingling' and args.to == 'PascalVOC':
logging.error(
"The jingling dataset can not convert to the PascalVOC dataset.",
exit=False)
pdx.tools.convert.dataset_conversion(args.source, args.to,
args.pics, args.annotations, args.save_dir )
if __name__ == "__main__":
......
......@@ -40,4 +40,5 @@ def get_encoding(path):
f = open(path, 'rb')
data = f.read()
file_encoding = chardet.detect(data).get('encoding')
f.close()
return file_encoding
\ No newline at end of file
......@@ -15,8 +15,10 @@
# limitations under the License.
from .x2imagenet import EasyData2ImageNet
from .x2imagenet import JingLing2ImageNet
from .x2coco import LabelMe2COCO
from .x2coco import EasyData2COCO
from .x2coco import JingLing2COCO
from .x2voc import LabelMe2VOC
from .x2voc import EasyData2VOC
from .x2seg import JingLing2Seg
......@@ -24,10 +26,34 @@ from .x2seg import LabelMe2Seg
from .x2seg import EasyData2Seg
easydata2imagenet = EasyData2ImageNet().convert
jingling2imagenet = JingLing2ImageNet().convert
labelme2coco = LabelMe2COCO().convert
easydata2coco = EasyData2COCO().convert
jingling2coco = JingLing2COCO().convert
labelme2voc = LabelMe2VOC().convert
easydata2voc = EasyData2VOC().convert
jingling2seg = JingLing2Seg().convert
labelme2seg = LabelMe2Seg().convert
easydata2seg = EasyData2Seg().convert
def dataset_conversion(source, to, pics, anns, save_dir):
if source == 'labelme' and to == 'PascalVOC':
labelme2voc(pics, anns, save_dir)
elif source == 'labelme' and to == 'MSCOCO':
labelme2coco(pics, anns, save_dir)
elif source == 'labelme' and to == 'SEG':
labelme2seg(pics, anns, save_dir)
elif source == 'jingling' and to == 'ImageNet':
jingling2imagenet(pics, anns, save_dir)
elif source == 'jingling' and to == 'MSCOCO':
jingling2coco(pics, anns, save_dir)
elif source == 'jingling' and to == 'SEG':
jingling2seg(pics, anns, save_dir)
elif source == 'easydata' and to == 'ImageNet':
easydata2imagenet(pics, anns, save_dir)
elif source == 'easydata' and to == 'PascalVOC':
easydata2voc(pics, anns, save_dir)
elif source == 'easydata' and to == 'MSCOCO':
easydata2coco(pics, anns, save_dir)
elif source == 'easydata' and to == 'SEG':
easydata2seg(pics, anns, save_dir)
\ No newline at end of file
......@@ -100,7 +100,7 @@ class LabelMe2COCO(X2COCO):
image["height"] = json_info["imageHeight"]
image["width"] = json_info["imageWidth"]
image["id"] = image_id + 1
image["file_name"] = json_info["imagePath"].split("/")[-1]
image["file_name"] = osp.split(json_info["imagePath"])[-1]
return image
def generate_polygon_anns_field(self, height, width,
......@@ -144,7 +144,7 @@ class LabelMe2COCO(X2COCO):
img_name_part = osp.splitext(img_file)[0]
json_file = osp.join(json_dir, img_name_part + ".json")
if not osp.exists(json_file):
os.remove(os.remove(osp.join(image_dir, img_file)))
os.remove(osp.join(image_dir, img_file))
continue
image_id = image_id + 1
with open(json_file, mode='r', \
......@@ -216,7 +216,7 @@ class EasyData2COCO(X2COCO):
img_name_part = osp.splitext(img_file)[0]
json_file = osp.join(json_dir, img_name_part + ".json")
if not osp.exists(json_file):
os.remove(os.remove(osp.join(image_dir, img_file)))
os.remove(osp.join(image_dir, img_file))
continue
image_id = image_id + 1
with open(json_file, mode='r', \
......@@ -255,3 +255,107 @@ class EasyData2COCO(X2COCO):
self.annotations_list.append(
self.generate_polygon_anns_field(points, segmentation, label, image_id, object_id,
label_to_num))
class JingLing2COCO(X2COCO):
"""将使用EasyData标注的检测或分割数据集转换为COCO数据集。
"""
def __init__(self):
super(JingLing2COCO, self).__init__()
def generate_images_field(self, json_info, image_id):
image = {}
image["height"] = json_info["size"]["height"]
image["width"] = json_info["size"]["width"]
image["id"] = image_id + 1
image["file_name"] = osp.split(json_info["path"])[-1]
return image
def generate_polygon_anns_field(self, height, width,
points, label, image_id,
object_id, label_to_num):
annotation = {}
annotation["segmentation"] = [list(np.asarray(points).flatten())]
annotation["iscrowd"] = 0
annotation["image_id"] = image_id + 1
annotation["bbox"] = list(map(float, self.get_bbox(height, width, points)))
annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3]
annotation["category_id"] = label_to_num[label]
annotation["id"] = object_id + 1
return annotation
def get_bbox(self, height, width, points):
polygons = points
mask = np.zeros([height, width], dtype=np.uint8)
mask = PIL.Image.fromarray(mask)
xy = list(map(tuple, polygons))
PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
mask = np.array(mask, dtype=bool)
index = np.argwhere(mask == 1)
rows = index[:, 0]
clos = index[:, 1]
left_top_r = np.min(rows)
left_top_c = np.min(clos)
right_bottom_r = np.max(rows)
right_bottom_c = np.max(clos)
return [
left_top_c, left_top_r, right_bottom_c - left_top_c,
right_bottom_r - left_top_r
]
def parse_json(self, img_dir, json_dir):
image_id = -1
object_id = -1
labels_list = []
label_to_num = {}
for img_file in os.listdir(img_dir):
img_name_part = osp.splitext(img_file)[0]
json_file = osp.join(json_dir, img_name_part + ".json")
if not osp.exists(json_file):
os.remove(osp.join(image_dir, img_file))
continue
image_id = image_id + 1
with open(json_file, mode='r', \
encoding=get_encoding(json_file)) as j:
json_info = json.load(j)
img_info = self.generate_images_field(json_info, image_id)
self.images_list.append(img_info)
anns_type = "bndbox"
for i, obj in enumerate(json_info["outputs"]["object"]):
if i == 0:
if "polygon" in obj:
anns_type = "polygon"
else:
if anns_type not in obj:
continue
object_id = object_id + 1
label = obj["name"]
if label not in labels_list:
self.categories_list.append(\
self.generate_categories_field(label, labels_list))
labels_list.append(label)
label_to_num[label] = len(labels_list)
if anns_type == "polygon":
points = []
for j in range(int(len(obj["polygon"]) / 2.0)):
points.append([obj["polygon"]["x" + str(j + 1)],
obj["polygon"]["y" + str(j + 1)]])
self.annotations_list.append(
self.generate_polygon_anns_field(json_info["size"]["height"],
json_info["size"]["width"],
points,
label,
image_id,
object_id,
label_to_num))
if anns_type == "bndbox":
points = []
points.append([obj["bndbox"]["xmin"], obj["bndbox"]["ymin"]])
points.append([obj["bndbox"]["xmax"], obj["bndbox"]["ymax"]])
points.append([obj["bndbox"]["xmin"], obj["bndbox"]["ymax"]])
points.append([obj["bndbox"]["xmax"], obj["bndbox"]["ymin"]])
self.annotations_list.append(
self.generate_rectangle_anns_field(points, label, image_id,
object_id, label_to_num))
\ No newline at end of file
......@@ -22,9 +22,8 @@ import shutil
import numpy as np
from .base import MyEncoder, is_pic, get_encoding
class EasyData2ImageNet(object):
"""将使用EasyData标注的分类数据集转换为COCO数据集。
"""
class X2ImageNet(object):
def __init__(self):
pass
......@@ -46,8 +45,8 @@ class EasyData2ImageNet(object):
continue
with open(json_file, mode="r", \
encoding=get_encoding(json_file)) as j:
json_info = json.load(j)
for output in json_info['labels']:
json_info = self.get_json_info(j)
for output in json_info:
cls_name = output['name']
new_image_dir = osp.join(dataset_save_dir, cls_name)
if not osp.exists(new_image_dir):
......@@ -55,4 +54,28 @@ class EasyData2ImageNet(object):
if is_pic(img_name):
shutil.copyfile(
osp.join(image_dir, img_name),
osp.join(new_image_dir, img_name))
\ No newline at end of file
osp.join(new_image_dir, img_name))
class EasyData2ImageNet(X2ImageNet):
"""将使用EasyData标注的分类数据集转换为ImageNet数据集。
"""
def __init__(self):
super(EasyData2ImageNet, self).__init__()
def get_json_info(self, json_file):
json_info = json.load(json_file)
json_info = json_info['labels']
return json_info
class JingLing2ImageNet(X2ImageNet):
"""将使用标注精灵标注的分类数据集转换为ImageNet数据集。
"""
def __init__(self):
super(X2ImageNet, self).__init__()
def get_json_info(self, json_file):
json_info = json.load(json_file)
json_info = json_info['outputs']['object']
return json_info
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册