这是我的代码:
wardName = ["DHANLAXMICOMPLEX", "POTALIYA", "ARJUN TOWER", "IIM"]
def get_all_pages():
global wardName
list = []
url = 'https://recruitment.advarisk.com/tests/scraping'
client = requests.session()
tree = html.fromstring(client.get(url).content)
csrf = tree.xpath('//input[@name="csrf_token"]/@value')[0]
for i in wardName:
formData = dict(csrf_token=csrf, ward=i)
headers = {'referer': url, 'content-type': 'application/x-www-form-urlencoded', 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
r = client.post(url, data=formData, headers=headers)
list.append(r.content)
return list
def parse_and_write_to_csv(htmls):
global wardName
parse = html.fromstring(htmls)
th = parse.xpath("//table[@id='results']/thead//th//text()")
soup = BeautifulSoup(htmls, "html.parser")
table = soup.select_one("#results")
for i in wardName:
name = str(i) + '.csv'
with open(name, 'w') as fw:
writer = csv.writer(fw)
writer.writerow(th)
writer.writerows([[j.text for j in i.find_all("td")] for i in table.select("tr + tr")])
def main():
for value in get_all_pages():
parse_and_write_to_csv(value)
if __name__ == '__main__':
main()
但正如您所见,所有CSV文件都包含上一个IIM页面的相同内容。我想让每个CSV文件根据其名称获取内容。我该怎么做才能获得正确的CSV?哪里出错了?
答案 0 :(得分:0)
在Bundle-SymbolicName: de.nico.mymodule
Bundle-Version: 1.0.0
Liferay-Require-SchemaVersion: 1.0.0
blnChanIncluded()
内,两个for i in wardNames
用法从未在迭代之间更改其内容
如果您想拥有不同的CSV内容,则需要将这些行移动到该循环中并更改它们
writer.writerow
一个建议是将wardName添加到其结果
th = parse.xpath("//table[@id='results']/thead//th//text()")
soup = BeautifulSoup(htmls, "html.parser")
table = soup.select_one("#results")
循环遍历那些
list.append((i, r.content))
重新定义你的功能,不要再次循环病房
for ward, page in get_all_page():
write_to_csv(ward, page)
另一个建议是删除全局列表。
def write_to_csv(ward,page):
parse = html.fromstring(page)
th = parse.xpath("//table[@id='results']/thead//th//text()")
soup = BeautifulSoup(page, "html.parser")
table = soup.select_one("#results")
with open (ward+'.csv', 'w') as f:
# write csv