我正在尝试根据 csv 列替换 xml 文件中的类名。实际上 xml
文件是注释文件。
这是xml的格式:
<annotation>
<folder>./test_xmls</folder>
<filename>000048_Panorama.jpg</filename>
<path>./images000048_Panorama.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>4000</width>
<height>2000</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>AAAA</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
我的 csv 包含 original
列并更改为`列。
格式为:
|original | change to|
--------------------------
| AAAA | class_A |
..................
Csv 有 20000 多行,其中包括 80000 个 xml 文件的所有 <name>AAAA</name>
。
我想将 xml
名称(如 AAAA
)与 csv 列匹配。如果它存在于 original
列中,那么我想用 change to
中的相应值替换,例如 AAAA
到 class_A
。
我尝试编写 python 代码,但它不起作用。 我的代码在这里
import xml.etree.ElementTree as ET
import os
import pandas as pd
from collections import defaultdict
import csv
from csv import reader
with open('table.csv', mode='r') as inp:
reader = csv.reader(inp)
dict_from_csv = {rows[0]:rows[2] for rows in reader}
#print(dict_from_csv)
root_path = "./xmls"
xml_list = sorted(os.listdir(root_path))
for xml_file in xml_list:
xml_path = os.path.join(root_path,xml_file)
# parse xml file
tree = ET.parse(xml_path)
# get root node
root = tree.getroot()
for member in root.findall('object'):
sub_child = member[0].text
print(sub_child)
for key, value in dict_from_csv.items():
if sub_child in key:
sub_child = sub_child.replace(sub_child, value)
#print(xml)
xml_file.write(sub_child)
print("Classes are changed : " + xml_path)
任何帮助将不胜感激。
谢谢
答案 0 :(得分:1)
以下代码应该可以满足您的要求:
import lxml.html # check https://pypi.org/project/lxml/
from csv import reader
from os.path import exists
import glob
def update_xml(path: str) -> None:
with open('./convertions.csv', 'r') as convertions, open(path, 'r') as annotation: # noqa: E501
tree = lxml.html.fromstring(annotation.read())
csv_reader = reader(convertions)
for idx, row in enumerate(csv_reader, start=1):
if idx == 1:
continue
original, change_to = row
tags = tree.xpath(f".//name[text()='{original}']")
for tag in tags:
tag.text = change_to
print(f'Changed class {original} to {change_to} in {path}')
with open(path, 'wb') as annotation:
new_content = lxml.html.tostring(tree)
if new_content.strip():
annotation.write(new_content)
print(f'Processing on {path} done')
if __name__ == '__main__':
for xml_file in glob.glob('*.xml'):
if exists(xml_file):
update_xml(path=xml_file)
annotation.xml:
<annotation>
<folder>./test_xmls</folder>
<filename>000048_Panorama.jpg</filename>
<path>./images000048_Panorama.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>4000</width>
<height>2000</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>AAAA</name>
<name>BBBB</name>
<name>CCCC</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox></bndbox></object></annotation>
conversions.csv:
original,change to
AAAA,class_A
BBBB,class_B
CCCC,class_C
答案 1 :(得分:0)
import lxml.html # check https://pypi.org/project/lxml/
from csv import reader
import xml.etree.ElementTree as ET
if __name__ == '__main__':
with open('./table.csv', 'r') as convertions:
csv_reader = reader(convertions)
root_path = "./xmls"
xml_list = sorted(os.listdir(root_path))
for xml_file in xml_list:
xml_path = os.path.join(root_path,xml_file)
#tree = lxml.html.fromstring(xml_path.read())
# parse xml file
tree = ET.parse(xml_path)
for idx, row in enumerate(csv_reader, start=1):
if idx == 1:
continue
original_col, change_to = row
tags = tree.xpath(f".//name[text()='{original_col}']")
for tag in tags:
tag.text = change_to
print(f'Changed class {original_col} to {change_to}')
new_content = lxml.html.tostring(tree)
print(new_content)
if new_content.strip():
tree.write(new_content)