提交 e63fbe49 编写于 作者: 锦鲤AI幸运's avatar 锦鲤AI幸运 🎯

修改完成:划分det与rec数据集

上级 373d0553
...@@ -17,15 +17,14 @@ def isCreateOrDeleteFolder(path, flag): ...@@ -17,15 +17,14 @@ def isCreateOrDeleteFolder(path, flag):
return flagAbsPath return flagAbsPath
def splitTrainVal(root, dir, absTrainRootPath, absValRootPath, absTestRootPath, trainTxt, valTxt, testTxt, flag): def splitTrainVal(root, absTrainRootPath, absValRootPath, absTestRootPath, trainTxt, valTxt, testTxt, flag):
# 按照指定的比例划分训练集、验证集、测试集 # 按照指定的比例划分训练集、验证集、测试集
labelPath = os.path.join(root, dir) dataAbsPath = os.path.abspath(root)
labelAbsPath = os.path.abspath(labelPath)
if flag == "det": if flag == "det":
labelFilePath = os.path.join(labelAbsPath, args.detLabelFileName) labelFilePath = os.path.join(dataAbsPath, args.detLabelFileName)
elif flag == "rec": elif flag == "rec":
labelFilePath = os.path.join(labelAbsPath, args.recLabelFileName) labelFilePath = os.path.join(dataAbsPath, args.recLabelFileName)
labelFileRead = open(labelFilePath, "r", encoding="UTF-8") labelFileRead = open(labelFilePath, "r", encoding="UTF-8")
labelFileContent = labelFileRead.readlines() labelFileContent = labelFileRead.readlines()
...@@ -38,9 +37,9 @@ def splitTrainVal(root, dir, absTrainRootPath, absValRootPath, absTestRootPath, ...@@ -38,9 +37,9 @@ def splitTrainVal(root, dir, absTrainRootPath, absValRootPath, absTestRootPath,
imageName = os.path.basename(imageRelativePath) imageName = os.path.basename(imageRelativePath)
if flag == "det": if flag == "det":
imagePath = os.path.join(labelAbsPath, imageName) imagePath = os.path.join(dataAbsPath, imageName)
elif flag == "rec": elif flag == "rec":
imagePath = os.path.join(labelAbsPath, "{}\\{}".format(args.recImageDirName, imageName)) imagePath = os.path.join(dataAbsPath, "{}\\{}".format(args.recImageDirName, imageName))
# 按预设的比例划分训练集、验证集、测试集 # 按预设的比例划分训练集、验证集、测试集
trainValTestRatio = args.trainValTestRatio.split(":") trainValTestRatio = args.trainValTestRatio.split(":")
...@@ -90,15 +89,20 @@ def genDetRecTrainVal(args): ...@@ -90,15 +89,20 @@ def genDetRecTrainVal(args):
recValTxt = open(os.path.join(args.recRootPath, "val.txt"), "a", encoding="UTF-8") recValTxt = open(os.path.join(args.recRootPath, "val.txt"), "a", encoding="UTF-8")
recTestTxt = open(os.path.join(args.recRootPath, "test.txt"), "a", encoding="UTF-8") recTestTxt = open(os.path.join(args.recRootPath, "test.txt"), "a", encoding="UTF-8")
for root, dirs, files in os.walk(args.labelRootPath): splitTrainVal(args.datasetRootPath, detAbsTrainRootPath, detAbsValRootPath, detAbsTestRootPath, detTrainTxt, detValTxt,
detTestTxt, "det")
for root, dirs, files in os.walk(args.datasetRootPath):
for dir in dirs: for dir in dirs:
splitTrainVal(root, dir, detAbsTrainRootPath, detAbsValRootPath, detAbsTestRootPath, detTrainTxt, detValTxt, if dir == 'crop_img':
detTestTxt, "det") splitTrainVal(root, recAbsTrainRootPath, recAbsValRootPath, recAbsTestRootPath, recTrainTxt, recValTxt,
splitTrainVal(root, dir, recAbsTrainRootPath, recAbsValRootPath, recAbsTestRootPath, recTrainTxt, recValTxt, recTestTxt, "rec")
recTestTxt, "rec") else:
continue
break break
if __name__ == "__main__": if __name__ == "__main__":
# 功能描述:分别划分检测和识别的训练集、验证集、测试集 # 功能描述:分别划分检测和识别的训练集、验证集、测试集
# 说明:可以根据自己的路径和需求调整参数,图像数据往往多人合作分批标注,每一批图像数据放在一个文件夹内用PPOCRLabel进行标注, # 说明:可以根据自己的路径和需求调整参数,图像数据往往多人合作分批标注,每一批图像数据放在一个文件夹内用PPOCRLabel进行标注,
...@@ -110,9 +114,9 @@ if __name__ == "__main__": ...@@ -110,9 +114,9 @@ if __name__ == "__main__":
default="6:2:2", default="6:2:2",
help="ratio of trainset:valset:testset") help="ratio of trainset:valset:testset")
parser.add_argument( parser.add_argument(
"--labelRootPath", "--datasetRootPath",
type=str, type=str,
default="../train_data/label", default="../train_data/",
help="path to the dataset marked by ppocrlabel, E.g, dataset folder named 1,2,3..." help="path to the dataset marked by ppocrlabel, E.g, dataset folder named 1,2,3..."
) )
parser.add_argument( parser.add_argument(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册