首页 > 其他分享 >xml数据转yolov5的txt代码

xml数据转yolov5的txt代码

时间:2022-12-14 17:24:49浏览次数:47  
标签:xml yolov5 name cat lst str txt root

通过labelimg标注的xml文件转换yolov5可训练的txt格式代码。包含读取xml文件代码,保存txt格式代码。

 

 


from lxml.etree import Element, SubElement, tostring, ElementTree
from xml.dom.minidom import parseString
import xml.etree.ElementTree as ET
import os

def get_root_lst(root, suffix='jpg', suffix_n=3): root_lst, name_lst = [], [] for dir, file, names in os.walk(root): root_lst = root_lst + [os.path.join(dir, name) for name in names if name[-suffix_n:] == suffix] name_lst = name_lst + [name for name in names if name[-suffix_n:] == suffix] return root_lst, name_lst def read_xml(xml_root): ''' :param xml_root: .xml文件 :return: dict('cat':['cat1',...],'bboxes':[[x1,y1,x2,y2],...],'whd':[w ,h,d]) ''' dict_info = {'cat': [], 'bboxes': [], 'box_wh': [], 'whd': []} if os.path.splitext(xml_root)[-1] == '.xml': tree = ET.parse(xml_root) # ET是一个xml文件解析库,ET.parse()打开xml文件。parse--"解析" root = tree.getroot() # 获取根节点 whd = root.find('size') whd = [whd.find('width').text, whd.find('height').text, whd.find('depth').text] for obj in root.findall('object'): # 找到根节点下所有“object”节点 cat = str(obj.find('name').text) # 找到object节点下name子节点的值(字符串) bbox = obj.find('bndbox') x1, y1, x2, y2 = [int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text)] b_w = x2 - x1 + 1 b_h = y2 - y1 + 1 dict_info['cat'].append(cat) dict_info['bboxes'].append([x1, y1, x2, y2]) dict_info['box_wh'].append([b_w, b_h]) dict_info['whd'].append(whd) else: print('[inexistence]:{} suffix is not xml '.format(xml_root)) return dict_info def build_dir(root): import os if not os.path.exists(root): os.makedirs(root) return root # 保存txt格式文件 def write_txt(text_lst, out_txt=None): ''' 每行内容为列表,将其写入text中 ''' out_dir = out_txt if out_txt is not None else 'classes.txt' file_write_obj = open(out_dir, 'w', encoding='utf-8') # 以写的方式打开文件,如果文件不存在,就会自动创建 for text in text_lst: file_write_obj.writelines(str(text)) file_write_obj.write('\n') file_write_obj.close() def get_str_name(root_str): if '\\' in root_str: root_str = root_str.split('\\')[-1] if '/' in root_str: root_str = root_str.split('/')[-1] return root_str def xml2yolotxt(xml_root, img_root=None, out_dir=None, cat_name_lst=None): ''' :param xml_root: xml的路径 :param img_root:图像路径,可提供也可不提供,提供主要获得图像的高宽 :param out_dir:保存txt路径 :param cat_name_lst:提供训练列表,xml中出现类别与列表对应,如['pedes', 'elec', 'car', 'truck', 'bus', 'tricycle'] pedes表示0,elec表示1,car表示2等 :return: ''' xml_root_lst, xml_names_lst = get_root_lst(xml_root, suffix='xml', suffix_n=3) out_dir = build_dir(out_dir) if out_dir is not None else build_dir(os.path.join(xml_root, 'out_dir_labels')) if img_root is not None: img_root_lst, img_names_lst = get_root_lst(img_root, suffix='jpg', suffix_n=3) for i, xml_root in tqdm(enumerate(xml_root_lst)): xml_info = read_xml(xml_root) if cat_name_lst is None: cat_lst = xml_info['cat'] # 类别是数字,从0 1 2 等 else: cat_lst = [list(cat_name_lst).index(c) for c in xml_info['cat']] # 类别名称,根据name列表得到数字类别 if img_root is not None: # 从中提取W与H j = list(img_names_lst).index(xml_names_lst[i][:-3] + 'jpg') img = cv2.imread(img_root_lst[int(j)]) H, W = img.shape[:2] else: whd = xml_info['whd'][0] W, H = float(whd[0]), float(whd[1]) boxes_lst = xml_info['bboxes'] yolotxt_lst = [] for i, b in enumerate(boxes_lst): bw, bh = b[2] - b[0], b[3] - b[1] x, y = b[0] + bw / 2, b[1] + bh / 2 x, y, w, h = x / W, y / H, bw / W, bh / H # yolotxt = str(cat_lst[i]) + ' ' + str(x) + ' ' + str(y) + ' ' + str(w) + ' ' + str(h) yolotxt = str(cat_lst[i]) + ' ' + str(x) + ' ' + str(y) + ' ' + str(w) + ' ' + str(h) yolotxt_lst.append(yolotxt) if len(yolotxt_lst) > 0: xml_name = get_str_name(xml_root) write_txt(yolotxt_lst, out_txt=os.path.join(out_dir, xml_name[:-3] + 'txt'))

 

标签:xml,yolov5,name,cat,lst,str,txt,root
From: https://www.cnblogs.com/tangjunjun/p/16982690.html

相关文章

  • 基于XML配置开发AspectJ
    1.pom.xml,添加依赖<?xmlversion="1.0"encoding="UTF-8"?><projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-i......
  • C# XML描述与实例增删改
    概述用于小型项目数据存储,信息传递。系统配置文件等特点必须有根节点(对比HTML)标签只能自定义 (对比HTML)头声明可有可无建议书写<?xmlversion="1.0"encoding=......
  • 【CAD开发】3dxml文件格式开发准备
    文章目录​​1、简介​​​​2、3DXML查看工具​​​​2.13DXMLPlayer​​​​2.2PixyzStudio​​​​3、3DXML开发用户手册​​​​3.1Model_3dxml​​​​3.2Model......
  • TDXML: XML-Based Task Dialogs with a Visual Task Dialog Editor
    TDXML:XML-BasedTaskDialogswithaVisualTaskDialogEditor Downloaddemoproject-228KBContentsIntroductionUsingtheVisualEditorOve......
  • UE4读取本地XML文件
    其实这里读取XML也是利用了Tinyxml来读取xml,主要是讲Tinyxml放在UE4中,遇到的一点点坑1.先给出Tinyxml链接:​​http://www.grinninglizard.com/tinyxml/​​,可以下载他的工......
  • DataSet(DataTable)与XML互转
    usingSystem;usingSystem.Data;usingSystem.IO;usingSystem.Xml;usingSystem.Text;//相应C#代码:privatestringConvertDataTableToXML(Data......
  • 在asp.net中读取XML文件信息的4种方法
    在asp.net中读取XML文件信息的4种方法方法一:使用XML控件<%@PageLanguage="C#"%><html><body><h3><fontface="Verdana">读取XML方法一</font></h3>......
  • C#操作XML
    需要添加的命名空间:usingSystem.Xml;定义几个公共对象:XmlDocumentxmldoc;XmlNodexmlnode;XmlElementxmlelem;1,创建到服务器同名目录下的xml......
  • 记一次 pugixml 编译错误的解决
    记一次pugixml编译错误的解决原总结工具editpadvs编译错误filelocator缘起前一阵子,平台在换基线,底层接口变了很多,因此引出了至少​​20000​​pugixml-compile-error诡......
  • 【问题】缺少web.xml Failed to execute goal org.apache.maven.plugins:maven-war-
     问题:Failedtoexecutegoalorg.apache.maven.plugins:maven-war-plugin::war(default-war)onprojectchange-interface:Executiondefault-warofgoalorg.apache.......