
时间:2014-02-27 21:26:35

标签: python python-2.7 csv iteration import-from-csv

我一直在研究我的python技能。 这是我正在处理的数据的原始文本文件:Titanic data


我试图分三个阶段来做这件事: 首先,添加一个与该人相关的前缀列(Mr,Mrs,Miss)。 然后,定义一个函数 - get_avg()来标识将找到信息的列以及该列的可能值,并将它们提供给grab_values函数。 第三,grab_values()计算每组人数和幸存者数量。

这一切都很好看而且花花公子...但它不起作用。 计数和总和我持续0。试图尽可能坚持打印命令并取得一些进展,但仍然无法理解我应该做什么。我觉得这个函数没有在所有行(或其中任何一行)上运行,但不知道这是不是真正的原因以及如何应对它。


import csv

titanic  = open('shorttitanic.txt', "rb")
reader = csv.reader(titanic)

prefix_list = ["Mr ", "Mrs", "Mis"]     # used to determine if passanger's name includes a prefix

# There are several demographic details we can count passengers and survivors with, this is a dictionary to map them out along with col number.
details = {"embarked":[5, "Southampton", "Cherbourg", "Queenstown", ""],
            "sex":[10, "male","female"], "pclass":[1,"1st","2nd","3rd"],
           "prefix":[12,"Mr ", "Mrs", "Mis"]}        # first item is col number (starts at 0), other items are the possible values

# Adding another column for prefix:
rownum = 0
for row in reader:
    # Finding the header:
    if rownum == 0:
        header = row
#        print header
        prefix_location = row[3].find(",") + 2              # finds the position of the comma, the prefix starts after the comma and after a space (+2)
        prefix = row[3][prefix_location:prefix_location+3]  # grabs the 3 first characters of the prefix
#        print len(prefix), prefix
        if prefix in prefix_list:                           # if there's a prefix in the passanger's name, it's appended to the row
            if prefix == "Mis":
                row.append("Miss")                          # Mis is corrected to Miss on appending, since we must work with 3 chars
            row.append("Other/Unknown")                     # for cases where there's no prefix in the passanger's name

#        print len(row), rownum, row[3], prefix, row[11]
#    print row

    rownum += 1

# grab_values() will run on all rows and count the number of passengers in each demographic and the number of survivors
def grab_values(col_num,i):
    print col_num, "item name", i
    count = 0
    tot = 0
    for row in reader:
#        print type(row[col_num][0]
        print row[col_num]
        if row[col_num] == i:
            count += 1
            if row[2] == int(1):
                tot += 1
#        print count, tot
    return count, tot

# get_avg() finds the column number and possible values of demographic x.

def get_avg(x):             # x is the category (sex, embarked...)
    col_num = details[x][0]
    for i in details[x][1:]:
        print col_num, i
#        print type(i)


        count,tot = grab_values(col_num,i)
        print count,tot

#        print i, count, tot





import csv

titanic  = open('titanic.txt', "rb")
reader = csv.reader(titanic)

prefix_list = ["Mr ", "Mrs", "Mis"]     # used to determine if passanger's name includes a prefix. Using 3 chars because of Mr. 

# There are several demographic details we can count passengers and survivors with, this is a dictionary to map them out along with col number.
details = {"embarked":[5, "Southampton", "Cherbourg", "Queenstown", ""],
            "sex":[10, "male","female"], "pclass":[1,"1st","2nd","3rd"],
           "prefix":[11,"Mr ", "Mrs", "Miss", "Unknown"]}        # first item is col number (starts at 0), other items are the possible values

# try to see how the prefix values can be created by using 11 and a refernce to prefix_list

# Here we'll do 2 things:
# I - Add another column for prefix, and -
# II - Create processed_list with each of the rows in reader, since we can only run over reader once,
# and since I don't know much about handling CSVs or generator yet we'll run on the processed_list instead

processed_list = []
rownum = 0
for row in reader:
    # Finding the header:
    if rownum == 0:
        header = row
        prefix_location = row[3].find(",") + 2              # finds the position of the comma, the prefix starts after the comma and after a space (+2)
        prefix = row[3][prefix_location:prefix_location+3]  # grabs the 3 first characters of the prefix

        if prefix in prefix_list:                           # if there's a prefix in the passanger's name, it's appended to the row
            if prefix == "Mis":
                row.append("Miss")                          # Mis is corrected to Miss on appending, since we must work with 3 chars
            row.append("Unknown")                           # for cases where there's no prefix in the passanger's name


    rownum += 1

# grab_values() will run on all rows and count the number of passengers in each demographic and the number of survivors
def grab_values(col_num,i):
#    print col_num, "item name", i
    num_on_board = 0
    num_survived = 0
    for row in processed_list:
        if row[col_num] == i:
            num_on_board += 1
            if row[2] == "1":
                num_survived += 1
    return num_on_board, num_survived

# get_avg() finds the column number and possible values of demographic x.

def get_avg(x):             # x is the category (sex, embarked...)
    col_num = details[x][0]
    for i in details[x][1:]:
        print "Looking for: ", i, "at col num: ", col_num


        num_on_board,num_survived = grab_values(col_num,i)

            proportion_survived = float(num_survived)/num_on_board
        except ZeroDivisionError:
            proportion_survived = "Cannot be calculated"

        print "Number of %s passengers on board: " %i ,  num_on_board, "\n" \
              "Number of %s passengers survived: " %i, num_survived, "\n" \
              "Proportion of %s passengers survived: " %i,  "%.2f%%" % (proportion_survived * 100), "\n"

print "Hello! I can calculate the proportion of passengers that survived according to these parameters: \n \
Embarked \n Sex \n Pclass \n Prefix", "\n"

def get_choice():
    possible_choices = ["embarked","sex","pclass","prefix"]
    choice = raw_input("Please enter your choice: ").lower()
    if choice not in possible_choices:
        print "Sorry, I can only work with Embarked / Sex / Pclass / Prefix. Please try again."
    return choice

user_choice = get_choice()



1 个答案:

答案 0 :(得分:1)




在您grab_valuesfor row in reader:执行此操作:

titanic  = open('titanic.txt', "rb")
reader = csv.reader(titanic)



# Adding another column for prefix:
processed_list = [] # Declare a new array
rownum = 0
for row in reader:
    if rownum == 0:
        header = row
        prefix_location = row[3].find(",") + 2
        prefix = row[3][prefix_location:prefix_location+3]
        if prefix in prefix_list:
            if prefix == "Mis":
                processed_list.append("Miss") #Change this
                processed_list.append(prefix) #Change this
            processed_list.append("Other/Unknown") #Change this

在抓取值中,将for row in reader更改为for row in processed_list