我已经制作了这个脚本,但我尝试了几个保存数据的选项,但是我一直在弄乱代码。如何将提取的数据保存到csv或excel文件中?
import requests
from bs4 import BeautifulSoup
base_url = "http://www.privredni-imenik.com/pretraga?abcd=&keyword=&cities_id=0&category_id=0&sub_category_id=0&page=1"
current_page = 1
while current_page < 200:
print(current_page)
url = base_url + str(current_page)
#current_page += 1
r = requests.get(url)
zute_soup = BeautifulSoup(r.text, 'html.parser')
firme = zute_soup.findAll('div', {'class': 'jobs-item'})
for title in firme:
title1 = title.findAll('h6')[0].text
print(title1)
adresa = title.findAll('div', {'class': 'description'})[0].text
print(adresa)
kontakt = title.findAll('div', {'class': 'description'})[1].text
print(kontakt)
print('\n')
page_line = "{title1}\n{adresa}\n{kontakt}".format(
title1=title1,
adresa=adresa,
kontakt=kontakt
)
current_page += 1
答案 0 :(得分:1)
获取CSV的一种简单方法是打印用逗号分隔的每一行,然后使用操作系统的“&gt;”写入文件。
import csv
import requests
from bs4 import BeautifulSoup
base_url = "http://www.privredni-imenik.com/pretraga?abcd=&keyword=&cities_id=0&category_id=0&sub_category_id=0&page=1"
current_page = 1
with open('scrape_results.csv', 'w', newline='') as scrape_results:
csvwriter = csv.writer(scrape_results)
while current_page < 200:
url = base_url + str(current_page)
r = requests.get(url)
zute_soup = BeautifulSoup(r.text, 'html.parser')
firme = zute_soup.findAll('div', {'class': 'jobs-item'})
for title in firme:
title1 = title.findAll('h6')[0].text
adresa = title.findAll('div', {'class': 'description'})[0].text
kontakt = title.findAll('div', {'class': 'description'})[1].text
csvwriter.writerow([current_page, title1, adresa, kontakt])
current_page += 1