我的abc.txt文件如下:
1
76 45 146 87
这是我在xyz.xml文件中显示的预期输出:
-<root>
-<object>
- <label>1</label>
</object>
-<cordinates>
<xmin>76</xmin>
<ymin>45</ymin>
<xmin>146</xmin>
<xmax>87</xmax>
</cordinates>
</root>
这是我尝试过的源代码:
import xml.etree.cElementTree as ET
root = ET.Element("root")
object = ET.SubElement(root, "object")
ET.SubElement(object, "label").text = "1"
cordinates = ET.SubElement(root, "cordinates")
ET.SubElement(cordinates, "xmin").text = "76"
ET.SubElement(cordinates, "ymin").text = "45"
ET.SubElement(cordinates, "xmin").text = "146"
ET.SubElement(cordinates, "xmax").text = "87"
tree = ET.ElementTree(root)
tree.write("xyz.xml")
但是问题是,我已经完成了标注并手动放置了坐标。我想在该程序中提取abc.txt文件,并自动对350多个.txt文件进行处理。那么有人可以帮我重写代码吗?任何帮助将不胜感激。谢谢!
答案 0 :(得分:0)
此代码将读取对象名称并协调成字典,然后遍历字典以创建所需的相应元素。
import xml.etree.cElementTree as ET
file_list = ['abc.txt', 'def.txt']
obj_coord = {}
for file in file_list:
f = open(file, "r")
object = f.readline()
coord_string = f.readline()
if not coord_string:
break
coord_list = coord_string.split()
obj_coord[object] = coord_string
f.close()
root = ET.Element("root")
for obj, coord in obj_coord.items():
object = ET.SubElement(root, "object")
ET.SubElement(object, "label").text = obj
cordinates = ET.SubElement(root, "cordinates")
ET.SubElement(cordinates, "xmin").text = coord[0]
ET.SubElement(cordinates, "ymin").text = coord[1]
ET.SubElement(cordinates, "xmin").text = coord[2]
ET.SubElement(cordinates, "xmax").text = coord[3]
tree = ET.ElementTree(root)
tree.write("xyz.xml")
答案 1 :(得分:0)
您可以首先创建一个函数,该函数使用txt_file
和xml_file
的名称并输入,然后像这样将txt_file
的内容写入xml_file
import xml.etree.cElementTree as ET
def write_xml(txt_file, xml_file):
label = 0
text_list = []
#list of name of coordinates
coords_list = ['xmin', 'ymin', 'xmax', 'ymax']
#Open the text file
with open(txt_file) as fp:
#Read the label and text string
label, text_str = fp.readlines()
#Create the list of text
text_list = [item for item in text_str.split()]
#Create the coordinates dictionary, with key as item of coords_list and values as item of text_list
coord_dict = dict(zip(coords_list, text_list))
#Create the xml file
root = ET.Element("root")
object = ET.SubElement(root, "object")
ET.SubElement(object, "label").text = label
cordinates = ET.SubElement(root, "cordinates")
#Iterate through the coordinates dictionary and assign the elements
for key, value in coord_dict.items():
ET.SubElement(cordinates, key).text = value
#Write to the xml file
tree = ET.ElementTree(root)
tree.write(xml_file)
然后您可以将函数调用为write_xml('file.txt', 'xyz.xml')
现在,您可以循环调用所有txt文件的功能
txt_file_paths = [....]
xml_file_paths = [....]
for idx in range(len(txt_file_paths)):
write_xml(txt_file_paths[idx], xml_file_paths[idx])
您可以使用txt_file_paths
列出txt文件文件夹中的所有文件来填充os.listdir
,然后使用os.path.join
创建文件路径并将其添加到列表中
import os
txt_files_folder = '<folder_with_txt_files>'
txt_file_names = []
for file in os.listdir(txt_files_folder):
txt_file_path = os.path.join(txt_files_folder, file)
txt_file_names.append(txt_file_path)
答案 2 :(得分:0)
import xml.etree.cElementTree as ET
import os
def toxml(lines, save_filepath):
def generate_xml(obj, cordinates_arr, save_filepath):
root_node = ET.Element("root")
object_node = ET.SubElement(root_node, "object")
ET.SubElement(object_node, "label").text = obj
cordinates_node = ET.SubElement(root_node, "cordinates")
ET.SubElement(cordinates_node, "xmin").text = cordinates_arr[0]
ET.SubElement(cordinates_node, "ymin").text = cordinates_arr[1]
ET.SubElement(cordinates_node, "xmin").text = cordinates_arr[2]
ET.SubElement(cordinates_node, "xmax").text = cordinates_arr[3]
tree = ET.ElementTree(root_node)
tree.write(save_filepath)
if len(lines) != 2:
print("Invalid content: {}".format(lines))
obj = lines[0].strip()
cordinates = lines[1].strip()
if len(obj) == '' or len(cordinates.split()) != 4:
print("Invalid line format: {}".format(lines))
# start generate
generate_xml(obj, cordinates, save_filepath)
def entry(target_dir_path, save_dri_path):
assert os.path.exists(target_dir_path), "Target directory is not exist: {}".format(target_dir_path)
assert os.path.exists(save_dir_path), "Save directory is not exist: {}".format(target_dir_path)
for filename in os.listdir(target_dir_path):
file_full_path = os.path.join(target_dir_path, filename)
filename_prefix, _ = os.path.splitext(filename)
save_path = os.path.join(save_dir_path, "{}.xml".format(filename_prefix))
try:
with open(file_full_path) as ff:
toxml(ff.readlines(), save_path)
except Exception as ex:
print("Generate {0} failed, with error msg: {1}.".format(filename, ex.__str__()))
if __name__ == '__main__':
target_dir_path = '/path/to/you/wanna/convert'
save_dir_path = '/path/to/you/wanna/save'
entry(target_dir_path, save_dir_path)
已经测试过的代码,请帮忙