From 7ec4c0e4cb6365543c09cb34225f02fd417648b6 Mon Sep 17 00:00:00 2001 From: Leif <4603009@qq.com> Date: Mon, 14 Dec 2020 15:59:48 +0800 Subject: [PATCH] Delete xml related codes --- PPOCRLabel/PPOCRLabel.py | 64 ++--------- PPOCRLabel/libs/labelFile.py | 152 ------------------------- PPOCRLabel/libs/pascal_voc_io.py | 183 ------------------------------- PPOCRLabel/libs/yolo_io.py | 146 ------------------------ 4 files changed, 8 insertions(+), 537 deletions(-) delete mode 100644 PPOCRLabel/libs/labelFile.py delete mode 100644 PPOCRLabel/libs/pascal_voc_io.py delete mode 100644 PPOCRLabel/libs/yolo_io.py diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py index c8e58f99..14e688c5 100644 --- a/PPOCRLabel/PPOCRLabel.py +++ b/PPOCRLabel/PPOCRLabel.py @@ -61,7 +61,6 @@ from libs.zoomWidget import ZoomWidget from libs.autoDialog import AutoDialog from libs.labelDialog import LabelDialog from libs.colorDialog import ColorDialog -from libs.labelFile import LabelFile, LabelFileError from libs.toolBar import ToolBar from libs.ustr import ustr from libs.hashableQListWidgetItem import HashableQListWidgetItem @@ -1028,9 +1027,6 @@ class MainWindow(QMainWindow, WindowMixin): def saveLabels(self, annotationFilePath, mode='Auto'): # Mode is Auto means that labels will be loaded from self.result_dic totally, which is the output of ocr model annotationFilePath = ustr(annotationFilePath) - if self.labelFile is None: - self.labelFile = LabelFile() - self.labelFile.verified = self.canvas.verified def format_shape(s): # print('s in saveLabels is ',s) @@ -1065,8 +1061,8 @@ class MainWindow(QMainWindow, WindowMixin): # self.lineColor.getRgb(), self.fillColor.getRgb()) # print('Image:{0} -> Annotation:{1}'.format(self.filePath, annotationFilePath)) return True - except LabelFileError as e: - self.errorMessage(u'Error saving label data', u'%s' % e) + except: + self.errorMessage(u'Error saving label data') return False def copySelectedShape(self): @@ -1258,26 +1254,8 @@ class MainWindow(QMainWindow, WindowMixin): # if unicodeFilePath in self.mImgList: if unicodeFilePath and os.path.exists(unicodeFilePath): - if LabelFile.isLabelFile(unicodeFilePath): - try: - self.labelFile = LabelFile(unicodeFilePath) - except LabelFileError as e: - self.errorMessage(u'Error opening file', - (u"

%s

" - u"

Make sure %s is a valid label file.") - % (e, unicodeFilePath)) - self.status("Error reading %s" % unicodeFilePath) - return False - self.imageData = self.labelFile.imageData - self.lineColor = QColor(*self.labelFile.lineColor) - self.fillColor = QColor(*self.labelFile.fillColor) - self.canvas.verified = self.labelFile.verified - else: - # Load image: - # read data first and store for saving into label file. - self.imageData = read(unicodeFilePath, None) - self.labelFile = None - self.canvas.verified = False + self.imageData = read(unicodeFilePath, None) + self.canvas.verified = False image = QImage.fromData(self.imageData) if image.isNull(): @@ -1289,8 +1267,7 @@ class MainWindow(QMainWindow, WindowMixin): self.image = image self.filePath = unicodeFilePath self.canvas.loadPixmap(QPixmap.fromImage(image)) - if self.labelFile: - self.loadLabels(self.labelFile.shapes) + if self.validFilestate(filePath) is True: self.setClean() else: @@ -1491,23 +1468,6 @@ class MainWindow(QMainWindow, WindowMixin): self.reRecogButton.setEnabled(True) self.actions.saveLabel.setEnabled(True) - def verifyImg(self, _value=False): - # Proceding next image without dialog if having any label - if self.filePath is not None: - try: - self.labelFile.toggleVerify() - except AttributeError: - # If the labelling file does not exist yet, create if and - # re-save it with the verified attribute. - self.saveFile() - if self.labelFile != None: - self.labelFile.toggleVerify() - else: - return - - self.canvas.verified = self.labelFile.verified - self.paintCanvas() - self.saveFile() def openPrevImg(self, _value=False): if len(self.mImgList) <= 0: @@ -1580,18 +1540,10 @@ class MainWindow(QMainWindow, WindowMixin): def saveFile(self, _value=False, mode='Manual'): # Manual mode is used for users click "Save" manually,which will change the state of the image - if self.defaultSaveDir is not None and len(ustr(self.defaultSaveDir)): - if self.filePath: - imgidx = self.getImglabelidx(self.filePath) - self._saveFile(imgidx, mode=mode) + if self.filePath: + imgidx = self.getImglabelidx(self.filePath) + self._saveFile(imgidx, mode=mode) - else: - imgFileDir = os.path.dirname(self.filePath) - imgFileName = os.path.basename(self.filePath) - savedFileName = os.path.splitext(imgFileName)[0] - savedPath = os.path.join(imgFileDir, savedFileName) - self._saveFile(savedPath if self.labelFile - else self.saveFileDialog(removeExt=False), mode=mode) def saveFileAs(self, _value=False): assert not self.image.isNull(), "cannot save empty image" diff --git a/PPOCRLabel/libs/labelFile.py b/PPOCRLabel/libs/labelFile.py deleted file mode 100644 index ebcca266..00000000 --- a/PPOCRLabel/libs/labelFile.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) 2016 Tzutalin -# Create by TzuTaLin - -try: - from PyQt5.QtGui import QImage -except ImportError: - from PyQt4.QtGui import QImage - -from base64 import b64encode, b64decode -from libs.pascal_voc_io import PascalVocWriter -from libs.yolo_io import YOLOWriter -from libs.pascal_voc_io import XML_EXT -from enum import Enum -import os.path -import sys - - -class LabelFileFormat(Enum): - PASCAL_VOC= 1 - YOLO = 2 - - -class LabelFileError(Exception): - pass - - -class LabelFile(object): - # It might be changed as window creates. By default, using XML ext - # suffix = '.lif' - suffix = XML_EXT - - def __init__(self, filename=None): - self.shapes = () - self.imagePath = None - self.imageData = None - self.verified = False - - def savePascalVocFormat(self, filename, shapes, imagePath, imageData, - lineColor=None, fillColor=None, databaseSrc=None): - imgFolderPath = os.path.dirname(imagePath) - imgFolderName = os.path.split(imgFolderPath)[-1] - imgFileName = os.path.basename(imagePath) - #imgFileNameWithoutExt = os.path.splitext(imgFileName)[0] - # Read from file path because self.imageData might be empty if saving to - # Pascal format - image = QImage() - image.load(imagePath) - imageShape = [image.height(), image.width(), - 1 if image.isGrayscale() else 3] - writer = PascalVocWriter(imgFolderName, imgFileName, - imageShape, localImgPath=imagePath) - writer.verified = self.verified - - for shape in shapes: - points = shape['points'] - label = shape['label'] - # Add Chris - difficult = int(shape['difficult']) - bndbox = LabelFile.convertPoints2BndBox(points) - writer.addBndBox(bndbox[0], bndbox[1], bndbox[2], bndbox[3], label, difficult) - - writer.save(targetFile=filename) - return - - def saveYoloFormat(self, filename, shapes, imagePath, imageData, classList, - lineColor=None, fillColor=None, databaseSrc=None): - imgFolderPath = os.path.dirname(imagePath) - imgFolderName = os.path.split(imgFolderPath)[-1] - imgFileName = os.path.basename(imagePath) - #imgFileNameWithoutExt = os.path.splitext(imgFileName)[0] - # Read from file path because self.imageData might be empty if saving to - # Pascal format - image = QImage() - image.load(imagePath) - imageShape = [image.height(), image.width(), - 1 if image.isGrayscale() else 3] - writer = YOLOWriter(imgFolderName, imgFileName, - imageShape, localImgPath=imagePath) - writer.verified = self.verified - - for shape in shapes: - points = shape['points'] - label = shape['label'] - # Add Chris - difficult = int(shape['difficult']) - bndbox = LabelFile.convertPoints2BndBox(points) - writer.addBndBox(bndbox[0], bndbox[1], bndbox[2], bndbox[3], label, difficult) - - writer.save(targetFile=filename, classList=classList) - return - - def toggleVerify(self): - self.verified = not self.verified - - ''' ttf is disable - def load(self, filename): - import json - with open(filename, 'rb') as f: - data = json.load(f) - imagePath = data['imagePath'] - imageData = b64decode(data['imageData']) - lineColor = data['lineColor'] - fillColor = data['fillColor'] - shapes = ((s['label'], s['points'], s['line_color'], s['fill_color'])\ - for s in data['shapes']) - # Only replace data after everything is loaded. - self.shapes = shapes - self.imagePath = imagePath - self.imageData = imageData - self.lineColor = lineColor - self.fillColor = fillColor - - def save(self, filename, shapes, imagePath, imageData, lineColor=None, fillColor=None): - import json - with open(filename, 'wb') as f: - json.dump(dict( - shapes=shapes, - lineColor=lineColor, fillColor=fillColor, - imagePath=imagePath, - imageData=b64encode(imageData)), - f, ensure_ascii=True, indent=2) - ''' - - @staticmethod - def isLabelFile(filename): - fileSuffix = os.path.splitext(filename)[1].lower() - return fileSuffix == LabelFile.suffix - - @staticmethod - def convertPoints2BndBox(points): - xmin = float('inf') - ymin = float('inf') - xmax = float('-inf') - ymax = float('-inf') - for p in points: - x = p[0] - y = p[1] - xmin = min(x, xmin) - ymin = min(y, ymin) - xmax = max(x, xmax) - ymax = max(y, ymax) - - # Martin Kersner, 2015/11/12 - # 0-valued coordinates of BB caused an error while - # training faster-rcnn object detector. - if xmin < 1: - xmin = 1 - - if ymin < 1: - ymin = 1 - - return (int(xmin), int(ymin), int(xmax), int(ymax)) diff --git a/PPOCRLabel/libs/pascal_voc_io.py b/PPOCRLabel/libs/pascal_voc_io.py deleted file mode 100644 index 6c582397..00000000 --- a/PPOCRLabel/libs/pascal_voc_io.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright (c) <2015-Present> Tzutalin -# Copyright (C) 2013 MIT, Computer Science and Artificial Intelligence Laboratory. Bryan Russell, Antonio Torralba, -# William T. Freeman. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and -# associated documentation files (the "Software"), to deal in the Software without restriction, including without -# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the -# Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT -# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -# SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -# CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -#!/usr/bin/env python -# -*- coding: utf8 -*- -import sys -from xml.etree import ElementTree -from xml.etree.ElementTree import Element, SubElement -from lxml import etree -import codecs -from libs.constants import DEFAULT_ENCODING -from libs.ustr import ustr - - -XML_EXT = '.xml' -ENCODE_METHOD = DEFAULT_ENCODING - -class PascalVocWriter: - - def __init__(self, foldername, filename, imgSize,databaseSrc='Unknown', localImgPath=None): - self.foldername = foldername - self.filename = filename - self.databaseSrc = databaseSrc - self.imgSize = imgSize - self.boxlist = [] - self.localImgPath = localImgPath - self.verified = False - - def prettify(self, elem): - """ - Return a pretty-printed XML string for the Element. - """ - rough_string = ElementTree.tostring(elem, 'utf8') - root = etree.fromstring(rough_string) - return etree.tostring(root, pretty_print=True, encoding=ENCODE_METHOD).replace(" ".encode(), "\t".encode()) - # minidom does not support UTF-8 - '''reparsed = minidom.parseString(rough_string) - return reparsed.toprettyxml(indent="\t", encoding=ENCODE_METHOD)''' - - def genXML(self): - """ - Return XML root - """ - # Check conditions - if self.filename is None or \ - self.foldername is None or \ - self.imgSize is None: - return None - - top = Element('annotation') - if self.verified: - top.set('verified', 'yes') - - folder = SubElement(top, 'folder') - folder.text = self.foldername - - filename = SubElement(top, 'filename') - filename.text = self.filename - - if self.localImgPath is not None: - localImgPath = SubElement(top, 'path') - localImgPath.text = self.localImgPath - - source = SubElement(top, 'source') - database = SubElement(source, 'database') - database.text = self.databaseSrc - - size_part = SubElement(top, 'size') - width = SubElement(size_part, 'width') - height = SubElement(size_part, 'height') - depth = SubElement(size_part, 'depth') - width.text = str(self.imgSize[1]) - height.text = str(self.imgSize[0]) - if len(self.imgSize) == 3: - depth.text = str(self.imgSize[2]) - else: - depth.text = '1' - - segmented = SubElement(top, 'segmented') - segmented.text = '0' - return top - - def addBndBox(self, xmin, ymin, xmax, ymax, name, difficult): - bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax} - bndbox['name'] = name - bndbox['difficult'] = difficult - self.boxlist.append(bndbox) - - def appendObjects(self, top): - for each_object in self.boxlist: - object_item = SubElement(top, 'object') - name = SubElement(object_item, 'name') - name.text = ustr(each_object['name']) - pose = SubElement(object_item, 'pose') - pose.text = "Unspecified" - truncated = SubElement(object_item, 'truncated') - if int(float(each_object['ymax'])) == int(float(self.imgSize[0])) or (int(float(each_object['ymin']))== 1): - truncated.text = "1" # max == height or min - elif (int(float(each_object['xmax']))==int(float(self.imgSize[1]))) or (int(float(each_object['xmin']))== 1): - truncated.text = "1" # max == width or min - else: - truncated.text = "0" - difficult = SubElement(object_item, 'difficult') - difficult.text = str( bool(each_object['difficult']) & 1 ) - bndbox = SubElement(object_item, 'bndbox') - xmin = SubElement(bndbox, 'xmin') - xmin.text = str(each_object['xmin']) - ymin = SubElement(bndbox, 'ymin') - ymin.text = str(each_object['ymin']) - xmax = SubElement(bndbox, 'xmax') - xmax.text = str(each_object['xmax']) - ymax = SubElement(bndbox, 'ymax') - ymax.text = str(each_object['ymax']) - - def save(self, targetFile=None): - root = self.genXML() - self.appendObjects(root) - out_file = None - if targetFile is None: - out_file = codecs.open( - self.filename + XML_EXT, 'w', encoding=ENCODE_METHOD) - else: - out_file = codecs.open(targetFile, 'w', encoding=ENCODE_METHOD) - - prettifyResult = self.prettify(root) - out_file.write(prettifyResult.decode('utf8')) - out_file.close() - - -class PascalVocReader: - - def __init__(self, filepath): - # shapes type: - # [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color, difficult] - self.shapes = [] - self.filepath = filepath - self.verified = False - try: - self.parseXML() - except: - pass - - def getShapes(self): - return self.shapes - - def addShape(self, label, bndbox, difficult): - xmin = int(float(bndbox.find('xmin').text)) - ymin = int(float(bndbox.find('ymin').text)) - xmax = int(float(bndbox.find('xmax').text)) - ymax = int(float(bndbox.find('ymax').text)) - points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)] - self.shapes.append((label, points, None, None, difficult)) - - def parseXML(self): - assert self.filepath.endswith(XML_EXT), "Unsupport file format" - parser = etree.XMLParser(encoding=ENCODE_METHOD) - xmltree = ElementTree.parse(self.filepath, parser=parser).getroot() - filename = xmltree.find('filename').text - try: - verified = xmltree.attrib['verified'] - if verified == 'yes': - self.verified = True - except KeyError: - self.verified = False - - for object_iter in xmltree.findall('object'): - bndbox = object_iter.find("bndbox") - label = object_iter.find('name').text - # Add chris - difficult = False - if object_iter.find('difficult') is not None: - difficult = bool(int(object_iter.find('difficult').text)) - self.addShape(label, bndbox, difficult) - return True diff --git a/PPOCRLabel/libs/yolo_io.py b/PPOCRLabel/libs/yolo_io.py deleted file mode 100644 index 216fba38..00000000 --- a/PPOCRLabel/libs/yolo_io.py +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf8 -*- -import sys -import os -from xml.etree import ElementTree -from xml.etree.ElementTree import Element, SubElement -from lxml import etree -import codecs -from libs.constants import DEFAULT_ENCODING - -TXT_EXT = '.txt' -ENCODE_METHOD = DEFAULT_ENCODING - -class YOLOWriter: - - def __init__(self, foldername, filename, imgSize, databaseSrc='Unknown', localImgPath=None): - self.foldername = foldername - self.filename = filename - self.databaseSrc = databaseSrc - self.imgSize = imgSize - self.boxlist = [] - self.localImgPath = localImgPath - self.verified = False - - def addBndBox(self, xmin, ymin, xmax, ymax, name, difficult): - bndbox = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax} - bndbox['name'] = name - bndbox['difficult'] = difficult - self.boxlist.append(bndbox) - - def BndBox2YoloLine(self, box, classList=[]): - xmin = box['xmin'] - xmax = box['xmax'] - ymin = box['ymin'] - ymax = box['ymax'] - - xcen = float((xmin + xmax)) / 2 / self.imgSize[1] - ycen = float((ymin + ymax)) / 2 / self.imgSize[0] - - w = float((xmax - xmin)) / self.imgSize[1] - h = float((ymax - ymin)) / self.imgSize[0] - - # PR387 - boxName = box['name'] - if boxName not in classList: - classList.append(boxName) - - classIndex = classList.index(boxName) - - return classIndex, xcen, ycen, w, h - - def save(self, classList=[], targetFile=None): - - out_file = None #Update yolo .txt - out_class_file = None #Update class list .txt - - if targetFile is None: - out_file = open( - self.filename + TXT_EXT, 'w', encoding=ENCODE_METHOD) - classesFile = os.path.join(os.path.dirname(os.path.abspath(self.filename)), "classes.txt") - out_class_file = open(classesFile, 'w') - - else: - out_file = codecs.open(targetFile, 'w', encoding=ENCODE_METHOD) - classesFile = os.path.join(os.path.dirname(os.path.abspath(targetFile)), "classes.txt") - out_class_file = open(classesFile, 'w') - - - for box in self.boxlist: - classIndex, xcen, ycen, w, h = self.BndBox2YoloLine(box, classList) - # print (classIndex, xcen, ycen, w, h) - out_file.write("%d %.6f %.6f %.6f %.6f\n" % (classIndex, xcen, ycen, w, h)) - - # print (classList) - # print (out_class_file) - for c in classList: - out_class_file.write(c+'\n') - - out_class_file.close() - out_file.close() - - - -class YoloReader: - - def __init__(self, filepath, image, classListPath=None): - # shapes type: - # [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color, difficult] - self.shapes = [] - self.filepath = filepath - - if classListPath is None: - dir_path = os.path.dirname(os.path.realpath(self.filepath)) - self.classListPath = os.path.join(dir_path, "classes.txt") - else: - self.classListPath = classListPath - - # print (filepath, self.classListPath) - - classesFile = open(self.classListPath, 'r') - self.classes = classesFile.read().strip('\n').split('\n') - - # print (self.classes) - - imgSize = [image.height(), image.width(), - 1 if image.isGrayscale() else 3] - - self.imgSize = imgSize - - self.verified = False - # try: - self.parseYoloFormat() - # except: - # pass - - def getShapes(self): - return self.shapes - - def addShape(self, label, xmin, ymin, xmax, ymax, difficult): - - points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)] - self.shapes.append((label, points, None, None, difficult)) - - def yoloLine2Shape(self, classIndex, xcen, ycen, w, h): - label = self.classes[int(classIndex)] - - xmin = max(float(xcen) - float(w) / 2, 0) - xmax = min(float(xcen) + float(w) / 2, 1) - ymin = max(float(ycen) - float(h) / 2, 0) - ymax = min(float(ycen) + float(h) / 2, 1) - - xmin = int(self.imgSize[1] * xmin) - xmax = int(self.imgSize[1] * xmax) - ymin = int(self.imgSize[0] * ymin) - ymax = int(self.imgSize[0] * ymax) - - return label, xmin, ymin, xmax, ymax - - def parseYoloFormat(self): - bndBoxFile = open(self.filepath, 'r') - for bndBox in bndBoxFile: - classIndex, xcen, ycen, w, h = bndBox.strip().split(' ') - label, xmin, ymin, xmax, ymax = self.yoloLine2Shape(classIndex, xcen, ycen, w, h) - - # Caveat: difficult flag is discarded when saved as yolo format. - self.addShape(label, xmin, ymin, xmax, ymax, False) -- GitLab