不知道发生了什么。它与导入到csv文件的其他代码完美配合。但是以某种方式不能传递数据。它会创建文件,但为空白。
如果有人可以给小费,将不胜感激。很有可能这只是一个简单的解决方案,因为我是新手。
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import re
import csv
filename = "test.csv"
f = open(filename, "a")
headers = "location, country, type, level, deep, vision, water,
access, life kind \n"
f.write(headers)
my_url = "http://www.example.com"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
links = page_soup.select('sea > a[href]')
link = [tag.get('href') for tag in links]
for url in link:
Client = uReq(url)
pageHtml = Client.read()
Client.close()
pSoup = soup(pageHtml, "html.parser")
linkeas = pSoup.findAll(href=re.compile(my_url))
def linkas(href):
return href and re.compile("html").search(href) and re.compile(my_url).search(href)
linka = pSoup.findAll(href=linkas)
if linka != []:
linkia = [tag.get('href') for tag in linka]
for curl in linkia:
cClient = uReq(curl)
pageHtml = cClient.read()
cClient.close()
Soup = soup(pageHtml, "html.parser")
info = Soup.select('.val')
if info != None:
location = Soup.select('.last')
if location[0].string != 'Page 2':
country = Soup.select('.cru > a:nth-of-type(3)')
countri = country[0].text.strip()
print(countri)
locat = location[0].text.strip()
print(locat)
tipo = info[0].text.strip()
print(tipo)
expe = info[1].text.strip()
print(expe)
depth = info[2].text.strip()
print(depth)
avg = info[3].text.strip()
print(avg)
cur = info[4].text.strip()
print(tipo)
acc = info[5].text.strip()
print(acc)
life = info[6].text.strip()
print(life)
f.write(locat.replace(",", " |") + "," + countri.replace(",", " |") + "," + tipo.replace(",", " |") + "," + expe.replace(",", " |") + "," + depth.replace(",", " |") + "," + avg.replace(",", " |") + "," + cur.replace(",", " |") + "," + acc.replace(",", " |") + "," + life.replace(",", " |") + "\n")
continue
else:
continue