提交 a237df88 编写于 作者: M mamingjie-China

add tools/split.py

上级 0efac7f7
......@@ -15,6 +15,7 @@
from six import text_type as _text_type
import argparse
import sys
import os.path as osp
import paddlex.utils.logging as logging
......@@ -85,6 +86,30 @@ def arg_parser():
"-fs",
default=None,
help="export inference model with fixed input shape:[w,h]")
parser.add_argument(
"--split_dataset",
"-sd",
action="store_true",
default=False,
help="split dataset with the split value")
parser.add_argument(
"--form", "-f", default=None, help="define dataset format")
parser.add_argument(
"--dataset_dir",
"-dd",
type=_text_type,
default=None,
help="define the path of dataset to be splited")
parser.add_argument(
"--val_value",
"-vv",
default=None,
help="define the value of validation dataset")
parser.add_argument(
"--test_value",
"-tv",
default=None,
help="define the value of test dataset")
return parser
......@@ -135,7 +160,7 @@ def main():
"paddlex --export_inference --model_dir model_path --save_dir infer_model"
)
pdx.convertor.export_onnx_model(model, args.save_dir)
if args.data_conversion:
assert args.source is not None, "--source should be defined while converting dataset"
assert args.to is not None, "--to should be defined to confirm the taregt dataset format"
......@@ -150,9 +175,35 @@ def main():
logging.error(
"The jingling dataset can not convert to the PascalVOC dataset.",
exit=False)
pdx.tools.convert.dataset_conversion(args.source, args.to,
args.pics, args.annotations, args.save_dir )
pdx.tools.convert.dataset_conversion(args.source, args.to, args.pics,
args.annotations, args.save_dir)
if args.split_dataset:
assert args.dataset_dir is not None, "--dataset_dir should be defined while spliting dataset"
assert args.form is not None, "--form should be defined while spliting dataset"
assert args.val_value is not None, "--val_value should be defined while spliting dataset"
dataset_dir = args.dataset_dir
dataset_form = args.form.lower()
val_value = float(args.val_value)
test_value = float(args.test_value
if args.test_value is not None else 0)
save_dir = args.save_dir if args.save_dir is not None else "./"
if not dataset_form in ["coco", "imagenet", "voc", "seg"]:
logging.error(
"The dataset form is not correct defined.(support COCO/ImageNet/VOC/Seg)"
)
if not osp.exists(dataset_dir):
logging.error("The path of dataset to be splited doesn't exist.")
if val_value <= 0 or val_value >= 1 or test_value < 0 or test_value >= 1 or val_value + test_value >= 1:
logging.error("The value of split is not correct.")
if not osp.exists(save_dir):
logging.error("The path of saved split information doesn't exist.")
print(11111111111111)
pdx.tools.split.dataset_split(dataset_dir, dataset_form, val_value,
test_value, save_dir)
print(222222222)
if __name__ == "__main__":
......
......@@ -14,4 +14,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .convert import *
\ No newline at end of file
from .convert import *
from .split import *
#!/usr/bin/env python
# coding: utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def dataset_split(dataset_dir, dataset_form, val_value, test_value, save_dir):
print(dataset_dir, dataset_form, val_value, test_value, save_dir)
print(12345)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册