xml:
<?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank>1</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor name="Austria" direction="E"/> <neighbor name="Switzerland" direction="W"/> </country> <country name="Singapore"> <rank>4</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor name="Malaysia" direction="N"/> </country> <country name="Panama"> <rank>68</rank> <year>2011</year> <gdppc>13600</gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E"/> </country> </data>
# encoding: utf-8 # 版权所有 2023 涂聚文有限公司 # 许可信息查看: # 描述: pip install beautifulsoup4 # Author : geovindu,Geovin Du 涂聚文. # IDE : PyCharm 2023.1 python 311 # Datetime : 2023/7/16 22:17 # User : geovindu # Product : PyCharm # Project : pythonTkinterDemo # File : XmlHelper.py # explain : 学习 from xml.dom import minidom import xml.etree.ElementTree as ET import csv import requests import os import sys def readXml(url): tree = ET.parse(url) root = tree.getroot() for child in root: print(child.tag, child.attrib) def writeXml(url): # 实例化Document树 doc = minidom.Document() # 创建根结点,XML必须存在root元素 root_node = doc.createElement('root') # 将元素挂载在doc树中 doc.appendChild(root_node) # 创建子元素 c_node1 = doc.createElement('movie') root_node.appendChild(c_node1) # 设置该元素存储数据 c_node1.setAttribute('shelf', 'New Arrivals') # 二级子结点 c_node2 = doc.createElement('type') c_node1.appendChild(c_node2) # 也用DOM创建文本结点,把文本结点(文字内容)看成子结点 c_text = doc.createTextNode("War, Thriller") c_node2.appendChild(c_text) try: with open(url, 'w', encoding='UTF-8') as f: # 第一个参数是目标文件对象 doc.writexml(f, indent='', addindent='\t', newl='\n', encoding='UTF-8') except Exception as e: print('错误:', e) def loadRSS(): # url of rss feed url = 'http://www.hindustantimes.com/rss/topnews/rssfeed.xml' # creating HTTP response object from given url resp = requests.get(url) # saving the xml file with open('topnewsfeed.xml', 'wb') as f: f.write(resp.content) def parseXML(xmlfile): # create element tree object tree = ET.parse(xmlfile) # get root element root = tree.getroot() # create empty list for news items newsitems = [] # iterate news items for item in root.findall('./channel/item'): # empty news dictionary news = {} # iterate child elements of item for child in item: # special checking for namespace object content:media if child.tag == '{http://search.yahoo.com/mrss/}content': news['media'] = child.attrib['url'] else: news[child.tag] = child.text.encode('utf8') # append news dictionary to news items list newsitems.append(news) # return news items list return newsitems def savetoCSV(newsitems, filename): # specifying the fields for csv file fields = ['guid', 'title', 'pubDate', 'description', 'link', 'media'] # writing to csv file with open(filename, 'w') as csvfile: # creating a csv dict writer object writer = csv.DictWriter(csvfile, fieldnames=fields) # writing headers (field names) writer.writeheader() # writing data rows writer.writerows(newsitems) def main(): # load rss from web to update existing xml file loadRSS() # parse xml file newsitems = parseXML('topnewsfeed.xml') # store news items in a csv file savetoCSV(newsitems, 'geovindu.csv')
标签:xml,python,doc,xmlhelper,url,child,news,root From: https://www.cnblogs.com/geovindu/p/17558751.html