我有一种感觉,我犯了一个愚蠢的错误。我试图从网站上抽取一些数据,我可以解析数据,但它不会写入csv。我是Python的初学者,我正在为此而烦恼。
到目前为止,这是我的代码:
import requests
import csv
from bs4 import BeautifulSoup
base_url = "http://www.example.com/"
total_pages = 100
def parse_content(r):
soup = BeautifulSoup(r.content,'lxml')
g_data = soup.find_all('ul', {"class": "searchResults"})
for item in g_data:
for li in item.findAll('li'):
for resultnameh2 in li.findAll('h2'):
for resultname in resultnameh2.findAll('a'):
cname = resultname.text
for resultAddress in li.findAll('p', {"class": "resultAddress"}):
caddress = resultAddress.text.replace('Get directions','').strip()
for resultContact in li.findAll('ul', {"class": "resultContact"}):
for resultContact in li.findAll('a', {"class": "resultMainNumber"}):
ccontact = resultContact.text
for page in range(1, total_pages):
response = requests.get(base_url + '/' + str(page))
if response.status_code != 200:
break
parse_content(response)
csvdata = parse_content(response)
with open('index.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([csvdata])
答案 0 :(得分:0)
cname
,caddress
,ccontact
的值已在函数parse_content
的上下文中设置,因此在csv的for
循环中不可用作家。
您需要返回这些值或在parse_content
方法中移动csv writer。
答案 1 :(得分:0)
你需要返回值
import requests
import csv
from bs4 import BeautifulSoup
base_url = "http://www.example.com/"
total_pages = 100
def parse_content(r): # you are not returning anything from this function, I will change this function so it will return required values.
cname=[]
caddress=[]
ccontact=[]
soup = BeautifulSoup(r.content,'lxml')
g_data = soup.find_all('ul', {"class": "searchResults"})
for item in g_data:
for li in item.findAll('li'):
for resultnameh2 in li.findAll('h2'):
for resultname in resultnameh2.findAll('a'):
cname.append(resultname.text) # As it is list you need to append all these values
for resultAddress in li.findAll('p', {"class": "resultAddress"}):
caddress.append(resultAddress.text.replace('Get directions','').strip())
for resultContact in li.findAll('ul', {"class": "resultContact"}):
for resultContact in li.findAll('a', {"class": "resultMainNumber"}):
ccontact.append(resultContact.text)
return cname,caddress,ccontact
for page in range(1, total_pages):
response = requests.get(base_url + '/' + str(page))
if response.status_code != 200:
break
parse_content(response)
cname,caddress,ccontact = parse_content(response)
print(cname)
print(caddress)
print(ccontact)
#print whatever you like from above lists
with open('index.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([csvdata])