xml解析
xml文檔
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2023</year>
<gdppc>141100</gdppc>
<neighbor direction="E" name="Austria" />
<neighbor direction="W" name="Switzerland" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2026</year>
<gdppc>59900</gdppc>
<neighbor direction="N" name="Malaysia" />
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2026</year>
<gdppc>13600</gdppc>
<neighbor direction="W" name="Costa Rica" />
<neighbor direction="E" name="Colombia" />
</country>
</data>
code
from xml.etree.ElementTree import parse
with open('demo.xml', 'r') as f:
et = parse(f)
print(et)
root = et.getroot()
print(root, root.tag, root.attrib, root.text.strip())
# 查找root下面的子元素证薇,注意孫子元素是無(wú)法找到的
print(root.findall('country')) # list -> [<Element 'country' at 0x0071E120>, <Element 'country' at 0x0071E270>, <Element 'country' at 0x0071E360>]
print(root.iterfind('country')) # <generator object prepare_child.<locals>.select at 0x00707D20>
print('*' * 80)
for child in root:
print(child, child.get('name'))
print('*'*80)
for child in root.iter():
print(child) # 打印所有的元素結(jié)點(diǎn)
# * : 找到元素的子結(jié)點(diǎn)
# . : 當(dāng)前的路徑
# // :找到任意層次下的子元素
# .. :找到它的父對(duì)象
# [@attrib="value"]:找到屬性值attrib為"value"的元素 <country name="Liechtenstein">
# [tag]
# [tag = 'text']:找到標(biāo)簽tag的值為text的元素
# position:root.findall('country[1]')
print(root.findall('country/*'))
print(root.findall('.//rank/..')) # 在當(dāng)前結(jié)點(diǎn)下(root),找到其下面任意層的子元素(rank)匆篓,的父元素(country)浑度,所以打印的是country元素,并且是一個(gè)list
print(root.findall('country[@name="Singapore"]'))
print(root.findall('country[rank]'))
print('*' * 80)
# position
# [<Element 'country' at 0x00A6E150>, <Element 'country' at 0x00A6E2A0>, <Element 'country' at 0x00A6E390>]
print(root.findall('country[1]')) # 找到第1個(gè)country元素
print(root.findall('country[2]')) # 找到第2個(gè)country元素
print(root.findall('country[last()]')) # 找到最后一個(gè)country元素
print(root.findall('country[last()-1]')) # 找到倒數(shù)第2個(gè)country元素
構(gòu)建xml文檔
簡(jiǎn)單使用
from xml.etree.ElementTree import Element, ElementTree
from xml.etree.ElementTree import tostring
e = Element('Data') # 創(chuàng)建一個(gè)元素
e.set('name', 'abc') # 為元素添加name屬性奕删,并指定name屬性值為'abc'
e.text = '123' # 為元素添加文本內(nèi)容
print(tostring(e)) # 將xml轉(zhuǎn)化為字符串文本俺泣,但是不含'\n''\t':b'<Data name="abc">123</Data>',導(dǎo)致大文本的xml可讀性差
e2 = Element('Row')
e3 = Element('Open')
e3.text = '8.80'
e2.append(e3) # 將e3添加到e2的子元素
e.append(e2) # 將e2添加到e的子元素
print(tostring(e))
et = ElementTree(e) # 生成ElementTree樹(shù)結(jié)構(gòu)完残,只需傳入根節(jié)點(diǎn)即可
et.write('demo02.xml') # 將樹(shù)結(jié)構(gòu)寫(xiě)文件即可
code
import csv
from xml.etree.ElementTree import ElementTree, Element
def csvtoxml(fname):
with open(fname, 'r') as f:
reader = csv.reader(f)
header = next(reader)
root = Element('Data')
for row in reader:
erow = Element('Row')
root.append(erow)
for tag, text in zip(header, row):
e = Element(tag)
e.text = text
erow.append(e)
pretty(root)
return ElementTree(root)
# 美化xml格式
def pretty(e, level=0):
if len(e) > 0:
e.text = '\n' + '\t' * (level+1)
for child in e:
pretty(child, level+1)
child.tail = child.tail[:-1]
e.tail = '\n' + '\t' * level
et = csvtoxml('pingan.csv')
et.write('pingan.xml')