标签:xml,文件,elif,return,damage,xmax,csv,type From: https://blog.csdn.net/weixin_45503872/article/details/142819788#xml数据提取,拼合为csv文件 import os import csv import xml.dom.minidom import glob def to_int(str_value): try: return int(str_value) except ValueError: try: float_value = float(str_value) return int(float_value) except ValueError: return None def get_damage_code(damage_type): """ 根据损伤类型返回相应的代码。 参数: damage_type (str): 损伤类型名称。 返回: int: 对应的损伤代码,如果损伤类型未知则返回None。 """ if damage_type == "bolt": return 30 elif damage_type == "bolt crack": return 31 elif damage_type == "head_unknow_1": return 501 elif damage_type == "head_unknow_2": return 502 elif damage_type == "head_unknow_3": return 503 elif damage_type == "joint": return 600 elif damage_type == "joint_1": return 601 elif damage_type == "joint_3": return 602 elif damage_type == "jietou": return 300 elif damage_type == "phantom wave": return 504 elif damage_type == "linehole": return 32 elif damage_type == "bolt_loss_white": return 701 elif damage_type == "bolt_loss_red": return 702 elif damage_type == "bolt_crack_h": return 703 elif damage_type == "head crack": return 505 elif damage_type == "bottom crack": return 800 elif damage_type == "badMidBolt": return 704 elif damage_type == "hanfengMidFlaw": return 900 elif damage_type == "unknown_45_white": return 705 elif damage_type == "unknown_45_red": return 706 else: return None # 如果损伤类型不匹配任何已知类型,则返回None def xml_to_csv(xml_files, csv_path): last_max_xmax = 0 # 初始化前一个文件的xmax最大值 # 打开一个新的CSV文件用于写入 with open(csv_path, 'w', newline='') as csvfile: csvwriter = csv.writer(csvfile) # 写入CSV的表头 csvwriter.writerow(['xmin', 'ymin', 'xmax', 'ymax','Injury']) # 遍历所有XML文件 for xml_file in xml_files: dom = xml.dom.minidom.parse(xml_file) root = dom.documentElement local_max_xmax = 0 # 初始化当前文件的xmax最大值 # 获取所有bndbox节点 bndboxes = root.getElementsByTagName('object') # 遍历每个bndbox节点 for bndbox in bndboxes: xmin = to_int(bndbox.getElementsByTagName('xmin')[0].firstChild.data) ymin = to_int(bndbox.getElementsByTagName('ymin')[0].firstChild.data) xmax = to_int(bndbox.getElementsByTagName('xmax')[0].firstChild.data) ymax = to_int(bndbox.getElementsByTagName('ymax')[0].firstChild.data) Injury = get_damage_code(bndbox.getElementsByTagName('name')[0].firstChild.data) # 如果这不是第一个文件,将前一个文件的xmax最大值加到当前文件的xmin值上 if last_max_xmax > 0: xmin += last_max_xmax+3000 xmax += last_max_xmax+3000 # 更新当前文件的xmax最大值 if xmax is not None: local_max_xmax = max(local_max_xmax, xmax) print(local_max_xmax) # 写入每个节点的值到CSV csvwriter.writerow([xmin, ymin, xmax, ymax,Injury]) # 更新全局xmax最大值 last_max_xmax = local_max_xmax def get_xml_files(path): # 使用glob模块获取所有XML文件 return glob.glob(os.path.join(path, '*.xml')) if __name__ == '__main__': # xml_path = 'F:\\zhengwangwork\\H\\样本\\完整的伤损样本库20240313\\完整的伤损样本库20240308\\xml' # xml_path = 'F:\\testcsv\\xml' xml_path = 'F:\\png' # csv_path = 'F:\\testcsv\\output_xml.csv' csv_path = 'F:\\testcsv\\6.csv' xml_files = get_xml_files(xml_path) xml_to_csv(xml_files, csv_path)