时间:2018-02-22 16:13:17

标签: python

你好我现在正试图通过对.csv中的数据进行排序,以确定每个月的平均降雪量,然后选择前三个。这是我到目前为止的代码。我认为我可以使用变量对数据进行排序,然后在下一行调用已排序的变量,但这不起作用。相反,我得到一个TypeError:无法转换' list'隐含地反对str。任何指导都非常感谢。 This is the data I'm trying to sort

谢谢。

import os
import csv
data = open('filteredData.csv','r')
# Create Dictionaries to store location values
# Snow_Fall is the number of inches for that location
# Number_Days is the number of days that there is Snowfall data for
Snow_Fall = {}
Number_Days = {}

# Create CSV reader
csv1 = csv.DictReader(data,delimiter=',')
# read each row of the CSV file and process it
for row in csv1:
    # Check the date column and see if it is in 2017
    if "2017" in row["DATE"]:
        # Split the date into Month, Day and Year
        Month, Day, Year = row["DATE"].split("/")
        # Check to see if the value in the snow column is null/none if so then skip processing that row
        if (row["SNOW"] is None) or (row["SNOW"] == ""):
            pass
        else:
            # Check to see if the location has been added to the dict if it has then add the data to itself
            # If it has not then just assign the data to the location.
            # Concat the Location and Month together to get a unique location per month
           # print (" The location is ==> " + row["NAME"] + " <<++>> The Month is " + str(Month))
            name_date = row["NAME"] + "_" + str(Month)
            if name_date in Snow_Fall:
                Snow_Fall[name_date] = Snow_Fall[name_date] + float(row["SNOW"])
                Number_Days[name_date] = Number_Days[name_date] + 1

            else:
                Snow_Fall[name_date] = float(row["SNOW"])
                Number_Days[name_date] = 1

# For each location we want to print the data for that location
for location in Snow_Fall:
   # split the location into Name and month
   location_name, location_date = location.split("_")
   #print ("The number of inches for location " + location_name + " in the Month of " + location_date + " is " + str(Snow_Fall[location]))            
   #print ("The number of days of snowfall for location " + " in the Month of " + location_date+ location_name + " is " + str(Number_Days[location]))
   #print ("The average Number of Inches for location " + location_name + " in the Month of " + location_date + " is " + str(Snow_Fall[location] / Number_Days[location]))

# Check if file exists then remove it if it does   
if (os.path.isfile('average2017.csv')):
   os.remove('average2017.csv')
# Open file   
data2 = open('average2017.csv','w')
# Write File headers
data2.write("Location, Month, Average Snow Fall \n")
# Write contents of the file
for location in Snow_Fall:
    # SPlit the location into name and month
    location_name, location_date = location.split("_")
    average = str(Snow_Fall[location] / Number_Days[location])
    newaverage = sorted([average])
    data2.write('"' + location_name + '",' + location_date + "," + newaverage + "\n")
# Close the open files
data2.close()
data.close()

以下信息来自测试

runfile(&#39; U:/CISP253/Final/temp.py' ;, wdir =&#39; U:/ CISP253 / Final&#39;)                                               地点月\ 0 SPARTA 2.8 W,MI US 8
1 SPARTA 2.8 W,MI US 9
2 ROCKFORD,MI US 12
3 ROCKFORD,MI US 11
4 KENTWOOD 1.6 WSW,MI US 2
5 SPARTA 2.8 W,MI US 5
6 SPARTA 2.8 W,MI US 6
7 SPARTA 2.8 W,MI US 7
8 KENTWOOD 1.6 WSW,MI US 5
9 COMSTOCK PARK 3.7 NNE,MI US 12
10 CALEDONIA 4.4 WNW,MI US 9
11 COMSTOCK PARK 3.7 NNE,MI US 10
12 GRAND RAPIDS 4.6 ESE,MI US 12
13 ROCKFORD 0.9 NNE,MI US 5
14 GRAND RAPIDS 4.6 ESE,MI US 10
15 GRAND RAPIDS 4.6 ESE,MI US 11
16 CALEDONIA 4.4 WNW,MI US 3
17 CALEDONIA 4.4 WNW,MI US 2
18 CALEDONIA 4.4 WNW,MI US 1
19 CALEDONIA 4.4 WNW,MI US 7
20 CALEDONIA 4.4 WNW,MI US 6
21 CALEDONIA 4.4 WNW,MI US 5
22 CALEDONIA 4.4 WNW,MI US 4
23 ROCKFORD 5.0 ESE,MI US 8
24 ROCKFORD,MI US 7
25 ROCKFORD,MI US 4
26 ROCKFORD,MI US 5
27洛克福德,美国密歇根州2 28洛克福德,美国密歇根州3 29 GRAND RAPIDS 2.9 NW,MI US 4
...... ...... 215 ALLENDALE 1.3 SSW,MI US 11
216 BELMONT 1.0 WNW,MI US 2
217 BELMONT 1.0 WNW,MI US 5
218 BELMONT 1.0 WNW,MI US 4
219 BELMONT 1.0 WNW,MI US 7
220 BELMONT 1.0 WNW,MI US 6
221 BELMONT 1.0 WNW,MI US 9
222 BELMONT 1.0 WNW,MI US 8
223 GRAND RAPIDS 2.5 ENE,MI US 7
224 JENISON 1.0 NE,MI US 10
225 JENISON 1.0 NE,MI US 11
226 GRAND RAPIDS GERALD R FORD INTERNATIONAL AIRPO ... 12
227 GRAND RAPIDS 2.9 NW,MI US 9
228 GRAND RAPIDS GERALD R FORD INTERNATIONAL AIRPO ... 10
229 GRAND RAPIDS GERALD R FORD INTERNATIONAL AIRPO ... 11
230 EAST GRAND RAPIDS,MI US 12
231 EAST GRAND RAPIDS,MI US 11
232 EAST GRAND RAPIDS,MI US 10
233 COMSTOCK PARK 3.7 NNE,MI US 8
234 KENTWOOD 1.6 WSW,MI US 7
235 GRAND RAPIDS 2.5 ENE,MI US 11
236 JENISON 1.0 NE,MI US 8
237 JENISON 1.0 NE,MI US 9
238 JENISON 1.0 NE,MI US 6
239 JENISON 1.0 NE,MI US 7
240 JENISON 1.0 NE,MI US 4
241 JENISON 1.0 NE,MI US 5
242 COMSTOCK PARK 3.7 NNE,MI US 11
243 COMSTOCK PARK 1.0 NNW,MI US 7
244 ADA 1.8 W,MI US 1

  AverageSnowFall   

0 0.000000
1 0.000000
2 0.948387
3 0.400000
4 0.000000
5 0.000000
6 0.000000
7 0.000000
8 0.000000
9 1.150000
10 0.000000
11 0.000000
12 2.390000
13 0.000000
14 0.000000
15 0.025000
16 0.408696
17 0.816667
18 0.647826
19 0.000000
20 0.000000
21 0.000000
22 0.415000
23 0.000000
24 0.000000
25 0.900000
26 0.000000
27 1.020000
28 0.928571
29 0.362500
.. ...
215 0.050000
216 0.172727
217 0.025000
218 0.000000
219 0.000000
220 0.000000
221 0.000000
222 0.000000
223 0.000000
224 0.000000
225 0.000000
226 1.193548
227 0.000000
228 0.000000
229 0.026667
230 1.264516
231 0.033333
232 0.000000
233 0.000000
234 0.000000
235 1.000000
236 0.000000
237 0.000000
238 0.000000
239 0.000000
240 0.000000
241 0.000000
242 0.000000
243 0.000000
244 0.609677

[245行×3列] 回溯(最近一次调用最后一次):

文件&#34;&#34;,第1行,in     runfile(&#39; U:/CISP253/Final/temp.py' ;, wdir =&#39; U:/ CISP253 / Final&#39;)

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ spyder \ utils \ site \ sitecustomize.py&#34;,第705行,在runfile中     execfile(filename,namespace)

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ spyder \ utils \ site \ sitecustomize.py&#34;,102行,在execfile中     exec(compile(f.read(),filename,&#39; exec&#39;),命名空间)

文件&#34; U:/CISP253/Final/temp.py" ;,第5行,在     data = data.sort_values(by = [&#39; AverageSnowFall&#39;])

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pandas \ core \ frame.py&#34;,第3619行,sort_values     k = self.xs(by,axis = other_axis).values

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pandas \ core \ generic.py&#34;,第2335行,xs     返回自我[键]

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pandas \ core \ frame.py&#34;,第2139行, getitem     return self._getitem_column(key)

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pandas \ core \ frame.py&#34;,第2146行,在_getitem_column中     return self._get_item_cache(key)

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pandas \ core \ generic.py&#34;,第1842行,在_get_item_cache中     values = self._data.get(item)

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pandas \ core \ internals.py&#34;,第3843行,获取     loc = self.items.get_loc(item)

文件&#34; C:\ Users \ dwillaford \ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pandas \ core \ indexes \ base.py&#34;,第2527行,在get_loc中     return self._engine.get_loc(self._maybe_cast_indexer(key))

pandas._libs.index.IndexEngine.get_loc

pandas._libs.index.IndexEngine.get_loc

文件&#34; pandas / _libs / hashtable_class_helper.pxi&#34;,第1265行,在pandas._libs.hashtable.PyObjectHashTable.get_item

文件&#34; pandas / _libs / hashtable_class_helper.pxi&#34;,第1273行,pandas._libs.hashtable.PyObjectHashTable.get_item

KeyError:&#39; AverageSnowFall&#39;

1 个答案:

答案 0 :(得分:0)

此示例是为使用Pandas dataframe量身定制的。您可以在csv中读取数据帧,按不同列的值排序,获取数据帧的子集等。这就是我为您的示例所做的事情:

import pandas as pd
data = pd.read_csv('filteredData.csv')
# Sort dataframe by Average Snow Fall column
data = data.sort_values(by=['Average Snow Fall'])
# Get the top 3 locations by average snow fall
print data['Location'].iloc[:3]