如何使用python将.txt文件转换为.xml文件

时间:2019-05-06 03:20:26

标签: python xml

我的abc.txt文件如下:

1
76 45 146 87

这是我在xyz.xml文件中显示的预期输出:

-<root>
   -<object>
       - <label>1</label>
     </object>
   -<cordinates>
        <xmin>76</xmin>
        <ymin>45</ymin>
        <xmin>146</xmin>
        <xmax>87</xmax>
    </cordinates>
 </root>

这是我尝试过的源代码:

import xml.etree.cElementTree as ET
root = ET.Element("root")
object = ET.SubElement(root, "object")
ET.SubElement(object, "label").text = "1"
cordinates = ET.SubElement(root, "cordinates")
ET.SubElement(cordinates, "xmin").text = "76"
ET.SubElement(cordinates, "ymin").text = "45"
ET.SubElement(cordinates, "xmin").text = "146"
ET.SubElement(cordinates, "xmax").text = "87"
tree = ET.ElementTree(root)
tree.write("xyz.xml")

但是问题是,我已经完成了标注并手动放置了坐标。我想在该程序中提取abc.txt文件,并自动对350多个.txt文件进行处理。那么有人可以帮我重写代码吗?任何帮助将不胜感激。谢谢!

3 个答案:

答案 0 :(得分:0)

此代码将读取对象名称并协调成字典,然后遍历字典以创建所需的相应元素。

import xml.etree.cElementTree as ET

file_list = ['abc.txt', 'def.txt']
obj_coord = {}

for file in file_list:
    f = open(file, "r")
    object = f.readline()
    coord_string = f.readline()
    if not coord_string: 
        break
    coord_list = coord_string.split() 
    obj_coord[object] = coord_string
    f.close()


root = ET.Element("root")

for obj, coord in obj_coord.items():
    object = ET.SubElement(root, "object")
    ET.SubElement(object, "label").text = obj 
    cordinates = ET.SubElement(root, "cordinates")
    ET.SubElement(cordinates, "xmin").text = coord[0]
    ET.SubElement(cordinates, "ymin").text = coord[1]
    ET.SubElement(cordinates, "xmin").text = coord[2]
    ET.SubElement(cordinates, "xmax").text = coord[3]

tree = ET.ElementTree(root)
tree.write("xyz.xml")

答案 1 :(得分:0)

您可以首先创建一个函数,该函数使用txt_filexml_file的名称并输入,然后像这样将txt_file的内容写入xml_file

import xml.etree.cElementTree as ET

def write_xml(txt_file, xml_file):
    label = 0
    text_list = []

    #list of name of coordinates
    coords_list = ['xmin', 'ymin', 'xmax', 'ymax']

    #Open the text file
    with open(txt_file) as fp:
        #Read the label and text string
        label, text_str = fp.readlines()
        #Create the list of text
        text_list = [item for item in text_str.split()]

    #Create the coordinates dictionary, with key as item of coords_list and values as item of text_list
    coord_dict = dict(zip(coords_list, text_list))

    #Create the xml file
    root = ET.Element("root")
    object = ET.SubElement(root, "object")
    ET.SubElement(object, "label").text = label

    cordinates = ET.SubElement(root, "cordinates")

    #Iterate through the coordinates dictionary and assign the elements
    for key, value in coord_dict.items():
        ET.SubElement(cordinates, key).text = value

    #Write to the xml file
    tree = ET.ElementTree(root)
    tree.write(xml_file)

然后您可以将函数调用为write_xml('file.txt', 'xyz.xml')

现在,您可以循环调用所有txt文件的功能

txt_file_paths = [....]
xml_file_paths = [....]

for idx in range(len(txt_file_paths)):
    write_xml(txt_file_paths[idx], xml_file_paths[idx])

您可以使用txt_file_paths列出txt文件文件夹中的所有文件来填充os.listdir,然后使用os.path.join创建文件路径并将其添加到列表中

import os
txt_files_folder = '<folder_with_txt_files>'
txt_file_names = []

for file in os.listdir(txt_files_folder):

    txt_file_path = os.path.join(txt_files_folder, file)
    txt_file_names.append(txt_file_path)

答案 2 :(得分:0)

import xml.etree.cElementTree as ET
import os


def toxml(lines, save_filepath):
    def generate_xml(obj, cordinates_arr, save_filepath):
        root_node = ET.Element("root")
        object_node = ET.SubElement(root_node, "object")
        ET.SubElement(object_node, "label").text = obj
        cordinates_node = ET.SubElement(root_node, "cordinates")
        ET.SubElement(cordinates_node, "xmin").text = cordinates_arr[0]
        ET.SubElement(cordinates_node, "ymin").text = cordinates_arr[1]
        ET.SubElement(cordinates_node, "xmin").text = cordinates_arr[2]
        ET.SubElement(cordinates_node, "xmax").text = cordinates_arr[3]
        tree = ET.ElementTree(root_node)
        tree.write(save_filepath)

    if len(lines) != 2:
        print("Invalid content: {}".format(lines))
    obj = lines[0].strip()
    cordinates = lines[1].strip()
    if len(obj) == '' or len(cordinates.split()) != 4:
        print("Invalid line format: {}".format(lines))
    # start generate
    generate_xml(obj, cordinates, save_filepath)

def entry(target_dir_path, save_dri_path):
    assert os.path.exists(target_dir_path), "Target directory is not exist: {}".format(target_dir_path)
    assert os.path.exists(save_dir_path), "Save directory is not exist: {}".format(target_dir_path)

    for filename in os.listdir(target_dir_path):
        file_full_path = os.path.join(target_dir_path, filename)
        filename_prefix, _ = os.path.splitext(filename)
        save_path = os.path.join(save_dir_path, "{}.xml".format(filename_prefix))
        try:
            with open(file_full_path) as ff:
                toxml(ff.readlines(), save_path)
        except Exception as ex:
            print("Generate {0} failed, with error msg: {1}.".format(filename, ex.__str__()))


if __name__ == '__main__':
    target_dir_path = '/path/to/you/wanna/convert'
    save_dir_path = '/path/to/you/wanna/save'
    entry(target_dir_path, save_dir_path)

已经测试过的代码,请帮忙