Python Openpyxl将新行添加到Excel

时间:2015-06-17 13:59:26

标签: python xml excel openpyxl

我正在使用元素树在XML文档中查找标记的值。然后我想将它附加到Excel工作表。我也有一些正则表达式寻找值。我想将它附加到同一张纸上。我还想将文件名附加到工作表中。这是我附加文件名的代码(尝试至少)。

#!/usr/bin/python
from openpyxl import Workbook
import os, sys
# Open a file
path = "xmls"
dirs = os.listdir( path )

# This would print all the files and directories
for file in dirs:
print (file)

wb = Workbook()

ws = wb.active

ws.append([file])

wb.save("sample.xlsx")


此代码使用elementTree从标签中获取值:

from openpyxl import Workbook
import xml.etree.ElementTree as ET
import os

for filename in os.listdir('xml'):

element_tree = ET.parse(filename)
root = element_tree.getroot()
agreement = root.find(".//title").text
print (agreement)


wb = Workbook()
#kevin = ["1", "2", "3"]

# grab the active worksheet
ws = wb.active

# Data can be assigned directly to cells
#ws['A1'] = 42

# Rows can also be appended
ws.append([agreement])
#ws.append(kevin)


# Save the file
wb.save("sample.xlsx")`


这是我查找正则表达式的代码

import re
import os
import openpyxl

#regular expressions to find the data we want to count
regexPattern1 = ">Data\s+\d*\s*\=*\s*</content>"
regexPattern2 = ">Some Data\s+\d*\s*\=*\s*</content>"
regexPattern3 = ">More Data\s+\d*\s*\=*\s*</content>"
regexPattern4 = ">Data More\s+\d*\s*\=*\s*</content>"
regexPattern5 = ">Some More Data\s+\d*\s*\=*\s*</content>"


#function to get the values of the various regular expressions above
def get_values(filepath):
    #empty list to hold the values we find.
    values = []
    another = []
    more = []
    mores = []
    smore = []



    for line in open(filepath, encoding="utf8").readlines():

        matchValue1 = re.search(regexPattern1, line)
        matchValue2 = re.search(regexPattern2, line)
        matchValue3 = re.search(regexPattern3, line)
        matchValue4 = re.search(regexPattern4, line)
        matchValue5 = re.search(regexPattern5, line)

        if matchValue1:
            values.append(matchValue1)
        if matchValue2:
            another.append(matchValue2)
        if matchValue3:
            more.append(matchValue3)
        if matchValue4:
            mores.append(matchValue4)
        if matchValue5:
            smore.append(matchValue5)


    # Now we want to calculate highest number in all the lists.
    try:
        maxVal = len(values)
    except:
        maxVal = '' # This case will handle if there are NO values at all

    try:
        maxAnother = len(another)
    except:
        maxAnother = ''

    try:
        maxmore = len(more)
    except:
        maxmore = ''

    try:
        maxmores = len(mores)
    except:
        maxmores = ''

    try:
        maxsmore = len(smore)
    except:
        maxsmore = ''


    return maxVal, maxAnother, maxmore, maxmores, maxsmore

def process_folder(folder, output_xls_path):
    files = [folder+'/'+f for f in os.listdir(folder) if ".xml" in f]
    writable_lines = []
    writable_lines.append(("Data1","Data1", "Data3", "Data4", "Data5")) # Header in the excel

    for file in files:
        values = get_values(file)
        writable_lines.append((str(values[0]),str(values[1]), str(values[2]), str(values[3]), str(values[4])))

    wb = openpyxl.Workbook()
    sheet = wb.active
    for i in range(len(writable_lines)):
        sheet['A' + str(i+1)].value = writable_lines[i][0]
        sheet['B' + str(i+1)].value = writable_lines[i][1]
        sheet['C' + str(i+1)].value = writable_lines[i][2]
        sheet['D' + str(i+1)].value = writable_lines[i][3]
        sheet['E' + str(i+1)].value = writable_lines[i][4]


    wb.save(output_xls_path)


if __name__ == '__main__':
    process_folder("xmls", "xml.xlsx")

0 个答案:

没有答案