如何创建文件并在其中保存已删除的数据?

时间:2017-11-19 22:13:58

标签: python beautifulsoup screen-scraping

我已经制作了这个脚本,但我尝试了几个保存数据的选项,但是我一直在弄乱代码。如何将提取的数据保存到csv或excel文件中?

import requests
from bs4 import BeautifulSoup

base_url = "http://www.privredni-imenik.com/pretraga?abcd=&keyword=&cities_id=0&category_id=0&sub_category_id=0&page=1"
current_page = 1

while current_page < 200:
    print(current_page)
    url = base_url + str(current_page)
    #current_page += 1
    r = requests.get(url)
    zute_soup = BeautifulSoup(r.text, 'html.parser')
    firme = zute_soup.findAll('div', {'class': 'jobs-item'})

    for title in firme:
        title1 = title.findAll('h6')[0].text
        print(title1)
        adresa = title.findAll('div', {'class': 'description'})[0].text
        print(adresa)
        kontakt = title.findAll('div', {'class': 'description'})[1].text
        print(kontakt)
        print('\n')
        page_line = "{title1}\n{adresa}\n{kontakt}".format(
            title1=title1,
            adresa=adresa,
            kontakt=kontakt
        )
    current_page += 1

1 个答案:

答案 0 :(得分:1)

获取CSV的一种简单方法是打印用逗号分隔的每一行,然后使用操作系统的“&gt;”写入文件。

import csv
import requests
from bs4 import BeautifulSoup

base_url = "http://www.privredni-imenik.com/pretraga?abcd=&keyword=&cities_id=0&category_id=0&sub_category_id=0&page=1"
current_page = 1


with open('scrape_results.csv', 'w', newline='') as scrape_results:
    csvwriter = csv.writer(scrape_results)

    while current_page < 200:
        url = base_url + str(current_page)
        r = requests.get(url)
        zute_soup = BeautifulSoup(r.text, 'html.parser')
        firme = zute_soup.findAll('div', {'class': 'jobs-item'})

        for title in firme:
            title1 = title.findAll('h6')[0].text
            adresa = title.findAll('div', {'class': 'description'})[0].text
            kontakt = title.findAll('div', {'class': 'description'})[1].text
            csvwriter.writerow([current_page, title1, adresa, kontakt])

        current_page += 1