我正在从excel XML文件中读取20000 x 16的表格。我在GitHub上使用此功能,但它的工作原理却很慢。有什么方法可以加快速度吗?
import pandas
from bs4 import BeautifulSoup
source = 'Positions_20171110.xls'
def read_excel_xml(path):
""" Converts Excel XML to a [[[list]]] """
file = open(path).read()
soup = BeautifulSoup(file,'xml')
workbook = []
for sheet in soup.findAll('Worksheet'):
sheet_as_list = []
for row in sheet.findAll('Row'):
row_as_list = []
for cell in row.findAll('Cell'):
row_as_list.append(cell.Data.text)
sheet_as_list.append(row_as_list)
workbook.append(sheet_as_list)
return workbook
data = read_excel_xml(source)
df = pd.DataFrame(data[0][1:],columns = data[0][0])