
时间:2018-08-03 21:21:08

标签: python xml python-3.x pandas os.walk


    |   |   MetaFileInfo.txt
    |   |   
    |   \---1
    |           00001.xml
    |           00002.xml
    |           00003.xml
    |           00004.xml
    |   |   MetaFileInfo.txt
    |   |   
    |   \---1
    |           00001.xml

import xmltodict
import numpy as np
import pandas as pd
from collections import Counter
import os
import glob
import re #Should I use regex to read the filenames

#Get a list of xml filenames with full paths
rootDir = "."
exten = '.xml'
logname = 'myfiles.log'
results = str()

for dirpath, dirnames, files in os.walk(rootDir):
    for name in files:
        if name.lower().endswith(exten):
            results += '%s\n' % os.path.join(dirpath, name)

with open (logname, 'w') as logfile:

#assume only 5 files in each folder
doc = []
for i in range(1,5):
    with open('0000{}.xml'.format(i)) as fd:

#This converts the parsed dictionary above to the 
#required data frame and works well.  

def Dict_toDF (xml_dict):
    logData_list = []
    for xmlval in xml_dict:
        channel_list = xmlval['logs']['log']['logData']['mnemonicList'].split(",")
        temp = [i.split(",") for i in xmlval ['logs']['log']['logData']['data']]
        temp.insert(0, xmlval ['logs']['log']['logData']['unitList'].split(","))
    return pd.DataFrame(np.array(logData_list).reshape(len(logData_list),len(channel_list)), columns = channel_list).drop_duplicates()

df = Dict_toDF (doc)

Came across这个类似的问题,只是在子文件夹中没有多个文件被视为一个文件。它使用os.chdir()进入每个目录并应用某些功能。似乎是一种可能的方法。

directories = [os.path.abspath(x[0]) for x in os.walk(directory_to_check)]
directories.remove(os.path.abspath(directory_to_check)) # If you don't want your main directory included

for i in directories:
      os.chdir(i)         # Change working Directory

0 个答案:
