我在下面有以下命令:
def get_list_of_university_towns():
import re
import pandas as pd
dataframe = pd.DataFrame(columns=('State','RegionName'))
with open('university_towns.txt',"r") as f_in:
lines = f_in.readlines()
i = 0
for line in lines:
if '[edit]' in line:
states = re.search(r'^([^(\[]+)', line).group(1)
else:
countries = re.search(r'^([^(\[]+)', line).group(1)
dataframe.loc[i] = [states,countries]
i += 1
listed = []
states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}
statesinverse = {v: k for k, v in states.items()}
for i in dataframe['State']:
if i in statesinverse.keys():
value = statesinverse.get(i)
listed.append(value)
dataframe['State'] = listed
return dataframe
get_list_of_university_towns()
但是,这将以以下形式打印输出:
State RegionName
0 AL Auburn
1 AL Florence
2 AL Jacksonville
我希望状态显示为'阿拉巴马州',而不是'AL'。
因此,我希望字典“ statesinverse” 的键出现在“状态”列中,而不是值中。
有人可以帮我吗?
答案 0 :(得分:3)
您不需要反转键,dict.get(<key>)
就像dict[<key>]
一样,它将返回给定键的值,但是get()
不会出错(如果找不到)键,将返回None
。确保您正在使用的数据框正在传递状态码“ AL”:
def get_list_of_university_towns():
import re
import pandas as pd
dataframe = pd.DataFrame(columns=('State','RegionName'))
with open('university_towns.txt',"r") as f_in:
lines = f_in.readlines()
i = 0
for line in lines:
if '[edit]' in line:
states = re.search(r'^([^(\[]+)', line).group(1)
else:
countries = re.search(r'^([^(\[]+)', line).group(1)
dataframe.loc[i] = [states,countries]
i += 1
listed = []
states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}
for i in dataframe['State']:
if i in states.keys():
value = states.get(i) # this should return the value 'Alabama' if key is 'AL'
listed.append(value)
dataframe['State'] = listed
return dataframe
get_list_of_university_towns()
测试:
states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}
states.get('AL')
输出:
'Alabama'
答案 1 :(得分:1)
这部分代码用缩写替换完整的状态名称:
listed = []
states = {'OH': 'Ohio', 'KY': 'Kentucky', ...}
statesinverse = {v: k for k, v in states.items()}
for i in dataframe['State']:
if i in statesinverse.keys():
value = statesinverse.get(i)
listed.append(value)
dataframe['State'] = listed
尝试删除此代码,然后查看输出。