我应该从.txt文件中获取某些信息并输出它。这是我需要的信息:
DATA看起来像:
Alabama
AL
4802982
Alaska
AK
721523
Arizona
AZ
6412700
Arkansas
AR
2926229
California
CA
37341989
这是我的代码,它并没有真正做我需要做的任何事情:
def main():
# Open the StateCensus2010.txt file.
census_file = open('StateCensus2010.txt', 'r')
# Read the state name
state_name = census_file.readline()
while state_name != '':
state_abv = census_file.readline()
population = int(census_file.readline())
state_name = state_name.rstrip('\n')
state_abv = state_abv.rstrip('\n')
print('State Name: ', state_name)
print('State Abv.: ', state_abv)
print('Population: ', population)
print()
state_name = census_file.readline()
census_file.close()
main()
我所做的就是阅读州名,abv并将人口转换为int。我不需要它做任何事情,但是我不确定如何做任务要求的事情。任何提示肯定会受到赞赏!我在过去的几个小时里一直在尝试一些事情但无济于事。
更新
这是我更新的代码,但是我收到了以下错误:
Traceback (most recent call last):
File "main.py", line 13, in <module>
if population > max_population:
TypeError: unorderable types: str() > int()
代码:
with open('StateCensus2010.txt', 'r') as census_file:
while True:
try:
state_name = census_file.readline()
state_abv = census_file.readline()
population = int(census_file.readline())
except IOError:
break
# data processing here
max_population = 0
for population in census_file:
if population > max_population:
max_population = population
print(max_population)
答案 0 :(得分:3)
由于数据的顺序一致;州名,州Abv,人口。所以你只需要读一次这些行,并显示所有三个3信息。以下是示例代码。
average = 0.0
total = 0.0
state_min = 999999999999
state_max = 0
statename_min = ''
statename_max = ''
texas_population = 0
with open('StateCensus2010.txt','r') as file:
# split new line, '\n' here means newline
data = file.read().split('\n')
# get the length of the data by using len() method
# there are 50 states in the text file
# each states have 3 information stored,
# state name, state abreviation, population
# that's why length of data which is 150/3 = 50 states
state_total = len(data)/3
# this count is used as an index for the list
count = 0
for i in range(int(state_total)):
statename = data[count]
state_abv = data[count+1]
population = int(data[count+2])
print('Statename : ',statename)
print('State Abv : ',state_abv)
print('Population: ',population)
print()
# sum all states population
total += population
if population > state_max:
state_max = population
statename_max = statename
if population < state_min:
state_min = population
statename_min = statename
if statename == 'Texas':
texas_population = population
# add 3 because we want to jump to next state
# for example the first three lines is Alabama info
# the next three lines is Alaska info and so on
count += 3
# divide the total population with number of states
average = total/state_total
print(str(average))
print('Lowest population state :', statename_min)
print('Highest population state :', statename_max)
print('Texas population :', texas_population)
答案 1 :(得分:1)
使用pandas这个问题非常简单。
<强>代码:强>
states = []
for line in data:
states.append(
dict(state=line.strip(),
abbrev=next(data).strip(),
pop=int(next(data)),
)
)
df = pd.DataFrame(states)
print(df)
print('\nmax population:\n', df.ix[df['pop'].idxmax()])
print('\nmin population:\n', df.ix[df['pop'].idxmin()])
print('\navg population:\n', df['pop'].mean())
print('\nAZ population:\n', df[df.abbrev == 'AZ'])
测试数据:
from io import StringIO
data = StringIO(u'\n'.join([x.strip() for x in """
Alabama
AL
4802982
Alaska
AK
721523
Arizona
AZ
6412700
Arkansas
AR
2926229
California
CA
37341989
""".split('\n')[1:-1]]))
<强>结果:强>
abbrev pop state
0 AL 4802982 Alabama
1 AK 721523 Alaska
2 AZ 6412700 Arizona
3 AR 2926229 Arkansas
4 CA 37341989 California
max population:
abbrev CA
pop 37341989
state California
Name: 4, dtype: object
min population:
abbrev AK
pop 721523
state Alaska
Name: 1, dtype: object
avg population:
10441084.6
AZ population:
abbrev pop state
2 AZ 6412700 Arizona
答案 2 :(得分:0)
请尝试这个以前的代码不兼容python 3。它支持python 2.7
def extract_data(state):
total_population = 0
for states, stats in state.items():
population = stats.get('population')
state_name = stats.get('state_name')
states = states
total_population = population + total_population
if 'highest' not in vars():
highest = population
higherst_state_name = state_name
highest_state = states
if 'lowest' not in vars():
lowest = population
lowest_state_name = state_name
lowest_state = states
if highest < population:
highest = population
higherst_state_name = state_name
highest_state = states
if lowest > population:
lowest = population
lowest_state_name = state_name
lowest_state = states
print(highest_state, highest)
print(lowest_state, lowest)
print(len(state))
print(int(total_population/len(state)))
print(state.get('TX').get('population'))
def main():
# Open the StateCensus2010.txt file.
census_file = open('states.txt', 'r')
# Read the state name
state_name = census_file.readline()
state = {}
while state_name != '':
state_abv = census_file.readline()
population = int(census_file.readline())
state_name = state_name.rstrip('\n')
state_abv = state_abv.rstrip('\n')
if state_abv in state:
state[state_abv].update({'population': population, 'state_name': state_name})
else:
state.setdefault(state_abv,{'population': population, 'state_name': state_name})
state_name = census_file.readline()
census_file.close()
return state
state=main()
extract_data(state)
答案 3 :(得分:0)
来自解释器的另一个pandas
解决方案:
>>> import pandas as pd
>>>
>>> records = [line.strip() for line in open('./your.txt', 'r')]
>>>
>>> df = pd.DataFrame([records[i:i+3] for i in range(0, len(records), 3)],
... columns=['State', 'Code', 'Pop']).dropna()
>>>
>>> df['Pop'] = df['Pop'].astype(int)
>>>
>>> df
State Code Pop
0 Alabama AL 4802982
1 Alaska AK 721523
2 Arizona AZ 6412700
3 Arkansas AR 2926229
4 California CA 37341989
>>>
>>> df.ix[df['Pop'].idxmax()]
State California
Code CA
Pop 37341989
Name: 4, dtype: object
>>>
>>> df.ix[df['Pop'].idxmin()]
State Alaska
Code AK
Pop 721523
Name: 1, dtype: object
>>>
>>> df['Pop'].mean()
10441084.6
>>>
>>> df.ix[df['Code'] == 'AZ' ]
State Code Pop
2 Arizona AZ 6412700