Python應用基礎-根據指定文件生成XML

    因項目需要根據指定格式的文件生成XML標註文件,可以方便使用LabelImg打開進行編輯和查看。其原始文件默認使用逗號進行分隔,如下所示:
image.png

  • 第1個值:原始圖片中切圖小文件,以AIpng_x,其中x代表原始圖片的第幾個切圖文件
  • 第2~5值:分別對應於ymin, xmin, ymax, xmax
  • 第6個值:代表對應的標籤標註

    在生成XML文件時,需要對其進行彙總,即將屬於同一個原始文件的切圖小文件的標註彙總到一起,其實現代碼如下所示:

import os
from Logger import MyLogger
from xml.dom.minidom import Document
from collections import defaultdict
import re

class OpeateXML:

    def __init__(self, srcPath: str, targetPath: str, srcFileName: str):
        self._srcPath = srcPath
        self._targetPath = targetPath
        self._srcFileName = srcFileName

    def readSrcFileName(self, fileEncoding="utf8") -> dict:
        data = defaultdict(list)
        s = re.compile("\.AIpng_\d{1,}", re.IGNORECASE)
        srcFileFullPath = os.path.join(self._srcPath, self._srcFileName)
        try:
            with open(srcFileFullPath, mode="r", encoding=fileEncoding, errors="ignore") as fr:
                for content in fr.readlines():
                    data[s.sub(".AIpng",content.strip().split(",")[0])].append(content.strip())
        except Exception as ex:
            MyLogger().error(f"OperateXML:read file error:\n{ex}")
            return {}
        else:
            # data.sort(key=lambda x: x.strip().split(",")[0])
            return data

    def createXML(self, data: dict, fileEncoding="utf8"):
        if data:
            try:
                for k,v in data.items():
                    doc = Document()
                    # 創建根節點
                    rootNode = doc.createElement("annotation")
                    # 添加根節點
                    doc.appendChild(rootNode)

                    folder = doc.createElement("folder")
                    folderText = doc.createTextNode(self._targetPath)
                    folder.appendChild(folderText)
                    rootNode.appendChild(folder)

                    filename = doc.createElement("filename")
                    filenameText = doc.createTextNode(k)
                    filename.appendChild(filenameText)
                    rootNode.appendChild(filename)

                    path = doc.createElement("path")
                    pathText = doc.createTextNode(os.path.join(self._targetPath,k))
                    path.appendChild(pathText)
                    rootNode.appendChild(path)
                    for i in v:
                        tmpData = i.strip().split(",")
                        if len(tmpData) == 6:
                            _, ymin, xmin, ymax, xmax, labelName = tmpData

                            objectObj = doc.createElement("object")
                            rootNode.appendChild(objectObj)

                            objectName = doc.createElement("name")
                            objectNameText = doc.createTextNode(labelName)
                            objectName.appendChild(objectNameText)
                            objectObj.appendChild(objectName)

                            objectBndBox = doc.createElement("bndbox")
                            objectObj.appendChild(objectBndBox)

                            objectBndBoxXmin = doc.createElement("xmin")
                            objectBndBoxYmin = doc.createElement("ymin")
                            objectBndBoxXmax = doc.createElement("xmax")
                            objectBndBoxYmax = doc.createElement("ymax")

                            objectBndBoxXminText = doc.createTextNode(xmin)
                            objectBndBoxYminText = doc.createTextNode(ymin)
                            objectBndBoxXmaxText = doc.createTextNode(xmax)
                            objectBndBoxYmaxText = doc.createTextNode(ymax)

                            objectBndBox.appendChild(objectBndBoxXmin)
                            objectBndBox.appendChild(objectBndBoxYmin)
                            objectBndBox.appendChild(objectBndBoxXmax)
                            objectBndBox.appendChild(objectBndBoxYmax)

                            objectBndBoxXmin.appendChild(objectBndBoxXminText)
                            objectBndBoxYmin.appendChild(objectBndBoxYminText)
                            objectBndBoxXmax.appendChild(objectBndBoxXmaxText)
                            objectBndBoxYmax.appendChild(objectBndBoxYmaxText)

                            objectObj.appendChild(objectBndBox)
                        else:
                            continue

                    # save xml
                    xmlName=os.path.splitext(k)[0]+".xml"
                    targetPath = os.path.join(self._targetPath, xmlName)
                    with open(targetPath, mode="w", encoding=fileEncoding) as fw:
                        doc.writexml(fw, indent="\t", newl="\n", addindent="\t", encoding=fileEncoding)
            except Exception as ex:
                MyLogger().error(f"OperateXML:Save xml error\n{ex}")
                return

if __name__ == '__main__':
    srcPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs"
    srcName = "locations.txt"
    targetPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs\in_number"
    operateXML = OpeateXML(srcPath, targetPath, srcName)
    a = operateXML.readSrcFileName()
    operateXML.createXML(a)

    最終生成的XML效果如下所示:

image.png

    在LabelImg中的效果如下所示:
image.png

本文地址:https://www.cnblogs.com/surpassme/p/13204899.html

本文同步在微信訂閱號上發佈,如各位小夥伴們喜歡我的文章,也可以關注我的微信訂閱號:woaitest,或掃描下面的二維碼添加關注:
MyQRCode.jpg

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章