我在将此词典写入csv时遇到问题。我之前使用的是csvwriter
,但我知道DictWriter
是实现这一目标的方法。我的目标是输出看起来像这样。
标题
a_name|a_abbreviation|moneyline|a_pitcher|h_name|h_abbreviation|moneyline|h_pitcher|t_runs|
行&列
texas rangers|TEX|+123|Mike Minor (L)|Seatle Mariners|SEA|-143|Mike Leake (R)|8.5|
下面的错误告诉我它缺少一个字典的开头字段。如果可以的话,我并不需要......
脚本
from bs4 import BeautifulSoup as bs4
import requests
import json
from lxml import html
from pprint import pprint
import re
import csv
outfile = open("BovadaOdds.csv", 'w',newline='')
fieldnames = ['abbreviation','name','moneyline','total runs','pitcher']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
def get_data():
url = 'https://sports.bovada.lv//baseball/mlb/game-lines-market-group'
r = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36"})
html_bytes = r.text
soup = bs4(html_bytes, 'lxml')
# res = soup.findAll('script') # find all scripts..
pattern = re.compile(r"swc_market_lists\s+=\s+(\{.*?\})")
script = soup.find("script", text=pattern)
return script.text[23:]
test1 = get_data()
data = json.loads(test1)
output = {}
root = data['items'][0]
for game_line in root['itemList']['items']:
# Create a temporary dict to store the data for this gameline
team_data = {}
# Get competitors
competitors = game_line['competitors']
for team in competitors:
team_type = team['type'] # either HOME or AWAY
# Create a new dict to store data for each team
team_data[team_type] = {}
team_data[team_type]['abbreviation'] = team['abbreviation']
team_data[team_type]['name'] = team['description']
# Get MoneyLine and Total Runs
for item in game_line['displayGroups'][0]['itemList']:
for outcome in item['outcomes']:
team_type = outcome['type'] # either A or H
team_type = 'AWAY' if team_type == 'A' else 'HOME'
if item['mainMarketType'] == 'MONEYLINE':
team_data[team_type]['moneyline'] = outcome['price']['american']
elif item['mainMarketType'] == 'TOTAL':
team_data[team_type]['total runs'] = outcome['price']['handicap']
# Get the pitchers
team_data['HOME']['pitcher'] = game_line['opponentAName']
team_data['AWAY']['pitcher'] = game_line['opponentBName']
# For each gameline, add the teamdata we gathered to the output dict
output[game_line['description']] = team_data
pprint(output)
writer.writerow(output)
pprint(输出):
{'Chicago Cubs @ Atlanta Braves': {'AWAY': {'abbreviation': 'CHC',
'moneyline': '-130',
'name': 'Chicago Cubs',
'pitcher': 'Yu Darvish (R)'},
'HOME': {'abbreviation': 'ATL',
'moneyline': '+110',
'name': 'Atlanta Braves',
'pitcher': 'Mike Foltynewicz (R)',
'total runs': '8.5'}},
'Chicago White Sox @ Pittsburgh Pirates': {'AWAY': {'abbreviation': 'CWS',
'moneyline': '+130',
'name': 'Chicago White '
'Sox',
'pitcher': 'Reynaldo '
'Lopez (R)'},
'HOME': {'abbreviation': 'PIT',
'moneyline': '-150',
'name': 'Pittsburgh '
'Pirates',
'pitcher': 'Trevor '
'Williams (R)',
'total runs': '8.0'}},
'Cincinnati Reds @ San Francisco Giants': {'AWAY': {'abbreviation': 'CIN',
'moneyline': '+100',
'name': 'Cincinnati Reds',
'pitcher': 'Tyler Mahle '
'(R)'},
'HOME': {'abbreviation': 'SF',
'moneyline': '-120',
'name': 'San Francisco '
'Giants',
'pitcher': 'Ty Blach (L)',
'total runs': '8.0'}},
'Cleveland Indians @ Detroit Tigers': {'AWAY': {'abbreviation': 'CLE',
'moneyline': '-130',
'name': 'Cleveland Indians',
'pitcher': 'Josh Tomlin (R)'},
'HOME': {'abbreviation': 'DET',
'moneyline': '+110',
'name': 'Detroit Tigers',
'pitcher': 'Francisco Liriano '
'(L)',
'total runs': '9.5'}},
'Colorado Rockies @ San Diego Padres': {'AWAY': {'abbreviation': 'COL',
'moneyline': '-120',
'name': 'Colorado Rockies',
'pitcher': 'German Márquez '
'(R)'},
'HOME': {'abbreviation': 'SD',
'moneyline': '+100',
'name': 'San Diego Padres',
'pitcher': 'Jordan Lyles (R)',
'total runs': '8.0'}},
'Houston Astros @ Los Angeles Angels': {'AWAY': {'abbreviation': 'HOU',
'moneyline': '-180',
'name': 'Houston Astros',
'pitcher': 'Gerrit Cole (R)'},
'HOME': {'abbreviation': 'LAA',
'moneyline': '+155',
'name': 'Los Angeles Angels',
'pitcher': 'Jaime Barria (R)',
'total runs': '7.5'}},
'Los Angeles Dodgers @ Miami Marlins': {'AWAY': {'abbreviation': 'LAD',
'moneyline': '-175',
'name': 'Los Angeles Dodgers',
'pitcher': 'Alex Wood (L)'},
'HOME': {'abbreviation': 'MIA',
'moneyline': '+150',
'name': 'Miami Marlins',
'pitcher': 'Wei-Yin Chen (L)',
'total runs': '8.0'}},
'Milwaukee Brewers @ Arizona Diamondbacks': {'AWAY': {'abbreviation': 'MIL',
'moneyline': '+142',
'name': 'Milwaukee '
'Brewers',
'pitcher': 'Jhoulys '
'Chacin (R)'},
'HOME': {'abbreviation': 'ARI',
'moneyline': '-164',
'name': 'Arizona '
'Diamondbacks',
'pitcher': 'Zack '
'Greinke (R)',
'total runs': '8.0'}},
'New York Yankees @ Washington Nationals': {'AWAY': {'abbreviation': 'NYY',
'moneyline': '-120',
'name': 'New York '
'Yankees',
'pitcher': 'Masahiro '
'Tanaka (R)'},
'HOME': {'abbreviation': 'WAS',
'moneyline': '+100',
'name': 'Washington '
'Nationals',
'pitcher': 'Gio Gonzalez '
'(L)',
'total runs': '8.5'}},
'Oakland Athletics @ Boston Red Sox': {'AWAY': {'abbreviation': 'OAK',
'moneyline': '+160',
'name': 'Oakland Athletics',
'pitcher': 'Daniel Mengden '
'(R)'},
'HOME': {'abbreviation': 'BOS',
'moneyline': '-185',
'name': 'Boston Red Sox',
'pitcher': 'Eduardo Rodriguez '
'(L)',
'total runs': '9.5'}},
'Philadelphia Phillies @ Baltimore Orioles': {'AWAY': {'abbreviation': 'PHI',
'moneyline': '-125',
'name': 'Philadelphia '
'Phillies',
'pitcher': 'Nick '
'Pivetta '
'(R)'},
'HOME': {'abbreviation': 'BAL',
'moneyline': '+105',
'name': 'Baltimore '
'Orioles',
'pitcher': 'Andrew '
'Cashner '
'(R)',
'total runs': '9.5'}},
'Seattle Mariners @ Minnesota Twins': {'AWAY': {'abbreviation': 'SEA',
'name': 'Seattle Mariners',
'pitcher': 'Wade LeBlanc (L)'},
'HOME': {'abbreviation': 'MIN',
'name': 'Minnesota Twins',
'pitcher': 'Jake Odorizzi '
'(R)'}},
'St. Louis Cardinals @ Minnesota Twins': {'AWAY': {'abbreviation': 'STL',
'moneyline': '+102',
'name': 'St. Louis '
'Cardinals',
'pitcher': 'Jack Flaherty '
'(R)'},
'HOME': {'abbreviation': 'MIN',
'moneyline': '-122',
'name': 'Minnesota Twins',
'pitcher': 'Jose Berrios '
'(R)',
'total runs': '9.0'}},
'Tampa Bay Rays @ Kansas City Royals': {'AWAY': {'abbreviation': 'TB',
'moneyline': '-385',
'name': 'Tampa Bay Rays',
'pitcher': 'Ryan Yarbrough '
'(L)'},
'HOME': {'abbreviation': 'KC',
'moneyline': '+270',
'name': 'Kansas City Royals',
'pitcher': 'Eric Skoglund '
'(L)',
'total runs': '4.5'}},
'Texas Rangers @ Seattle Mariners': {'AWAY': {'abbreviation': 'TEX',
'moneyline': '+123',
'name': 'Texas Rangers',
'pitcher': 'Mike Minor (L)'},
'HOME': {'abbreviation': 'SEA',
'moneyline': '-143',
'name': 'Seattle Mariners',
'pitcher': 'Mike Leake (R)',
'total runs': '8.5'}},
'Toronto Blue Jays @ New York Mets': {'AWAY': {'abbreviation': 'TOR',
'moneyline': '+155',
'name': 'Toronto Blue Jays',
'pitcher': 'Jaime Garcia (L)'},
'HOME': {'abbreviation': 'NYM',
'moneyline': '-180',
'name': 'New York Mets',
'pitcher': 'Noah Syndergaard '
'(R)',
'total runs': '7.5'}}}
错误:
Traceback (most recent call last):
File "C:/Users/mike/Desktop/BOVADA_Output.py", line 68, in <module>
writer.writerow(output)
File "C:\Program Files\Python36\lib\csv.py", line 155, in writerow
return self.writer.writerow(self._dict_to_list(rowdict))
File "C:\Program Files\Python36\lib\csv.py", line 151, in _dict_to_list
+ ", ".join([repr(x) for x in wrong_fields]))
ValueError: dict contains fields not in fieldnames: 'Seattle Mariners @ Minnesota Twins', 'Houston Astros @ Los Angeles Angels', 'Cincinnati Reds @ San Francisco Giants', 'Chicago White Sox @ Pittsburgh Pirates', 'New York Yankees @ Washington Nationals', 'Toronto Blue Jays @ New York Mets', 'Philadelphia Phillies @ Baltimore Orioles', 'Los Angeles Dodgers @ Miami Marlins', 'Oakland Athletics @ Boston Red Sox', 'St. Louis Cardinals @ Minnesota Twins', 'Chicago Cubs @ Atlanta Braves', 'Milwaukee Brewers @ Arizona Diamondbacks', 'Texas Rangers @ Seattle Mariners', 'Colorado Rockies @ San Diego Padres', 'Cleveland Indians @ Detroit Tigers', 'Tampa Bay Rays @ Kansas City Royals'
答案 0 :(得分:1)
我找不到使用DictWriter
的解决方案,因为您发布了ValueError
fieldnames
。
然而,csv.writer()
似乎有可能writer = csv.writer(outfile) # changed csv.DictWriter to a csv.writer
# ... everything else unchanged ...
d = output # unnecessary use of a shorter dictionary name to use in below for-loop
for k, v in d.items():
entry = [d[k]['AWAY']['name'], d[k]['AWAY']['abbreviation'], d[k]['AWAY']['moneyline'], d[k]['AWAY']['pitcher'], \
d[k]['AWAY']['name'], d[k]['HOME']['abbreviation'], d[k]['HOME']['moneyline'], d[k]['HOME']['pitcher'], \
d[k]['HOME']['total runs']]
writer.writerow(entry)
生成一个包含内容的csv
文件: