我对使用Python以Excel格式编写数据不太熟悉,需要一些帮助将我的数据输出写入具有多个标签的单个.xlsx(Excel)文件中。
我的代码在这里给出:
import time
import requests
import random
from lxml import html
from bs4 import BeautifulSoup
import xlsxwriter
def write_to_file(file, mode, data, newline=None, with_tab=None):
with open(file, mode, encoding='utf-8') as l:
if with_tab == True:
data = '\t'.join(data)
if newline == True:
data = data+'\n'
l.write(data)
link = ["http://ec.europa.eu/environment/ets/ohaDetails.do?returnURL=&languageCode=en&accountID=®istryCode=&buttonAction=all&action=&account.registryCode=&accountType=&identifierInReg=&accountHolder=&primaryAuthRep=&installationIdentifier=&installationName=&accountStatus=&permitIdentifier=&complianceStatus=&mainActivityType=-1&searchType=oha&resultList.currentPageNumber="+str(var)+"&nextList=Next%C2%A0%3E&selectedPeriods=" for var in range(17500)] # This will read the URL's line by line as per specific value of var.
start = 1
end = 20
for pagenum, links in enumerate(link[start:end]):
print(links)
r = requests.get(links)
time.sleep(random.randint(2,5))
soup = BeautifulSoup(r.content,"lxml")
# Table 2
for items in soup.find(id="tblAccountContactInfo").find_all("tr")[:]:
dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
print(dataset)
write_to_file('Table3.tsv', 'a', dataset, with_tab=True, newline=True)
write_to_file('Table3.tsv', 'a', links)
# Table 3
for items in soup.find(id="tblChildDetails").find("table").find_all("tr"):
dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
print(dataset)
write_to_file('Table3.tsv', 'a', dataset, with_tab=True, newline=True)
write_to_file('Table3.tsv', 'a', links)
#workbook = xlsxwriter.Workbook('Table3.xlsx')
#worksheet = workbook.add_worksheet("Table 3")
#worksheet.write(dataset)
#workbook.close()
我需要.xlsx Excel工作表中多个选项卡的输出,例如表1选项卡和表2选项卡,当前我正在获取.tsv格式的数据。我已经尝试过xlsxwriter,但无法获得结果,所以评论了这些内容。请帮助
答案 0 :(得分:2)
您需要首先创建两个工作表,并跟踪要用于每个工作表的当前行。然后,append_row()
函数可以将一行数据添加到所需的工作表中。
import time
import requests
import random
from lxml import html
from bs4 import BeautifulSoup
import xlsxwriter
def append_row(ws, row):
for col, value in enumerate(row):
ws.write_string(ws.cur_row, col, value)
ws.cur_row += 1
workbook = xlsxwriter.Workbook('output.xlsx')
ws_2 = workbook.add_worksheet("Table 2")
ws_3 = workbook.add_worksheet("Table 3")
# Keep a track of the row to use in each worksheet
ws_2.cur_row = 0
ws_3.cur_row = 0
start = 1
end = 3
link = "http://ec.europa.eu/environment/ets/ohaDetails.do?returnURL=&languageCode=en&accountID=®istryCode=&buttonAction=all&action=&account.registryCode=&accountType=&identifierInReg=&accountHolder=&primaryAuthRep=&installationIdentifier=&installationName=&accountStatus=&permitIdentifier=&complianceStatus=&mainActivityType=-1&searchType=oha&resultList.currentPageNumber={}&nextList=Next%C2%A0%3E&selectedPeriods="
for page_number in range(start, end):
print("Page {}".format(page_number))
url = link.format(page_number)
r = requests.get(url)
time.sleep(random.randint(2, 5))
soup = BeautifulSoup(r.content, "lxml")
# Table 2
for items in soup.find(id="tblAccountContactInfo").find_all("tr")[:]:
dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
append_row(ws_2, [url] + dataset])
# Table 3
for items in soup.find(id="tblChildDetails").find("table").find_all("tr"):
dataset = [item.get_text(strip=True) for item in items.find_all("td")[:]]
append_row(ws_3, [url] + dataset])
workbook.close()