#!/usr/bin/python
from __future__ import print_function
import textwrap
import requests
from bs4 import BeautifulSoup
def bbb_spider(max_pages):
bus_cat = raw_input('Enter a business category: ')
pages = 1
while pages <= max_pages:
url = 'http://www.bbb.org/search/?type=category&input=' + str(bus_cat) + '&page=' + str(pages)
sauce_code = requests.get(url)
plain_text = sauce_code.text
soup = BeautifulSoup(plain_text, "html.parser")
for link in soup.select("table.search-results-table tr h4 a"):
href = link.get('href')
bbb_profiles(href)
pages += 1
def bbb_profiles(profile_urls):
sauce_code = requests.get(profile_urls)
plain_text = sauce_code.text
soup = BeautifulSoup(plain_text, "html.parser")
for business_name in soup.findAll("h1", {"class": "business-title"}):
print(business_name.string)
for business_phone in soup.findAll("span", {"class": "business-phone"}):
print(business_phone.string)
for business_address in soup.select("div.business-detail-text p span.nobr"):
print(business_address.string, end=" ")
sort_pages = input('How many pages to sort through?: ')
bbb_spider(sort_pages)
所以我有一些看起来像这样的输出:
Phone
Address Name of Business
我想要的是:
Name of Business
Phone
Address
基本上这里发生的事情是,在我的foreach循环中,对于我想要的下一个项目块(姓名,电话,地址),名称与前一个地址的块在同一行。所以我要做的就是将名称向下拉到左边,然后向左边的地址,然后手机就可以了。