下面的代码是正常的,直到我将“trainer”字段添加到scrape.This字段是html中第二个兄弟的第二部分,代表Line2。其他字段代表源代码中的第1行。我得到了需要189行代码,但是当我包含提取训练器的代码时,我只会在每场比赛中获得最后一只狗(不包括所有其他5只狗)。这只是18行代码。由于某种原因,BS无法正常工作随着循环。包含教练场是破坏rows.append.Here是网址 http://www.gbgb.org.uk/resultsMeeting.aspx?id=135754 这是代码
import csv
from bs4 import BeautifulSoup
import requests
html = requests.get("http://www.gbgb.org.uk/resultsMeeting.aspx?id=135754").text
soup = BeautifulSoup(html,'lxml')
rows = []
for header in soup.find_all("div", class_="resultsBlockHeader"):
track = header.find("div", class_="track").get_text(strip=True).encode('ascii', 'ignore').strip("|")
date = header.find("div", class_="date").get_text(strip=True).encode('ascii', 'ignore').strip("|")
datetime = header.find("div", class_="datetime").get_text(strip=True).encode('ascii', 'ignore').strip("|")
grade = header.find("div", class_="grade").get_text(strip=True).encode('ascii', 'ignore').strip("|")
distance = header.find("div", class_="distance").get_text(strip=True).encode('ascii', 'ignore').strip("|")
prizes = header.find("div", class_="prizes").get_text(strip=True).encode('ascii', 'ignore').strip("|")
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line1")
for result in results:
fin = result.find("li", class_="fin").get_text(strip=True)
greyhound = result.find("li", class_="greyhound").get_text(strip=True)
trap = result.find("li", class_="trap").get_text(strip=True)
sp = result.find("li", class_="sp").get_text(strip=True)
timeSec = result.find("li", class_="timeSec").get_text(strip=True)
timeDistance = result.find("li", class_="timeDistance").get_text(strip=True)
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line2")
for result in results:
trainer = result.find("li", class_="trainer").get_text(strip=True)
rows.append({
"track": track,
"date": date,
"greyhound": greyhound,
"datetime":datetime,
"sp" :sp,
"grade":grade,
"distance":distance,
"prizes":prizes,
"timeSec":timeSec,
"timeDistance":timeDistance,
"trap":trap,
"fin":fin,
"trainer":trainer
})
with open("greyfile.csv", "w") as f:
writer = csv.DictWriter(f, ["track","date","trap","fin","greyhound","datetime","sp","grade","distance","prizes","timeSec","timeDistance","trainer"])
for row in rows:
writer.writerow(row)
答案 0 :(得分:1)
我最好的猜测是,在第二个for循环下你有rows.append
之前,所以要复制这两个行为的行为都在下面。
import csv
from bs4 import BeautifulSoup
import requests
html = requests.get("http://www.gbgb.org.uk/resultsMeeting.aspx?id=135754").text
soup = BeautifulSoup(html,'lxml')
rows = []
for header in soup.find_all("div", class_="resultsBlockHeader"):
track = header.find("div", class_="track").get_text(strip=True).encode('ascii', 'ignore').strip("|")
date = header.find("div", class_="date").get_text(strip=True).encode('ascii', 'ignore').strip("|")
datetime = header.find("div", class_="datetime").get_text(strip=True).encode('ascii', 'ignore').strip("|")
grade = header.find("div", class_="grade").get_text(strip=True).encode('ascii', 'ignore').strip("|")
distance = header.find("div", class_="distance").get_text(strip=True).encode('ascii', 'ignore').strip("|")
prizes = header.find("div", class_="prizes").get_text(strip=True).encode('ascii', 'ignore').strip("|")
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line1")
details = []
for result in results:
fin = result.find("li", class_="fin").get_text(strip=True)
greyhound = result.find("li", class_="greyhound").get_text(strip=True)
trap = result.find("li", class_="trap").get_text(strip=True)
sp = result.find("li", class_="sp").get_text(strip=True)
timeSec = result.find("li", class_="timeSec").get_text(strip=True)
timeDistance = result.find("li", class_="timeDistance").get_text(strip=True)
details.append({"greyhound": greyhound, "sp": sp, "fin": fin, "timeSec": timeSec, "timeDistance": timeDistance, "trap": trap, })
results = header.find_next_sibling("div", class_="resultsBlock").find_all("ul", class_="line2")
for index, result in enumerate(results):
trainer = result.find("li", class_="trainer").get_text(strip=True)
details[index]["trainer"] = trainer
for detail in details:
detail.update({"track": track, "date": date, "datetime": datetime, "grade": grade, "prizes": prizes})
rows.append(detail)
with open("greyfile.csv", "w") as f:
writer = csv.DictWriter(f, ["track","date","trap","fin","greyhound","datetime","sp","grade","distance","prizes","timeSec","timeDistance","trainer"])
for row in rows:
writer.writerow(row)