尝试使用get_loc时出现Pandas错误

时间:2016-06-08 15:37:07

标签: python csv pandas dataframe

我一直在研究一个程序来对CSV进行一些解析,并安装了Anaconda2,并通过pip更新了最新的pandas软件包。但由于某些原因,无论程序多么简单,我都会在尝试使用get_loc时遇到错误。

import sys
import pandas as pd

infile = pd.read_csv("Path/to/CSV")

loc_num = infile.get_loc('ColumnName')

print loc_num

这就是错误:

  

loc_num = infile.get_loc(' Item')   AttributeError:' DataFrame'对象没有属性' get_loc'

任何人都知道我做错了什么?大熊猫可能被错误安装了吗?

编辑:这是我正在进行的完整计划,因此您可以了解我正在处理的问题。

import pandas as pd  # import pandas library for csv processing, abbreviating it pd
import sys  # import sys for writing to terminal
import Tkinter as tk
import tkFileDialog, ttk

file_loc_list = []
master_list=[]

class Files(tk.Frame):

   def __init__(self):

       tk.Frame.__init__(self)

       master_list=[]

       #define final column entry
       final_col_l = tk.Label(self, text="What is the final column you want before the images?", wraplength=150)
       final_col_l.grid(row=0)
       final_col_e = tk.Entry(self)
       final_col_e.grid(row=0, column=1)

        # define what is used to count the images
       img_count_l = tk.Label(self, text="Is this a calendar strip?")
       img_count_l.grid(row=1)
       img_count = tk.StringVar()
       img_countnumber = ttk.Radiobutton(self, text="No", variable=img_count, value="number")
       img_countday5 = ttk.Radiobutton(self, text='Yes (5 Days)', variable=img_count, value='day5')
       img_countday7 = ttk.Radiobutton(self, text='Yes (7 Days)', variable=img_count, value='day7')
       img_countnumber.grid(row=2, column=1)
       img_countday5.grid(row=3, column=1)
       img_countday7.grid(row=4, column=1)


       #define number of images selection
       n_images_l = tk.Label(self, text="How many images are you creating?", wraplength=150)
       n_images_l.grid(row=5, column=0)
       self.n_images_box_value = tk.IntVar()
       n_images = ttk.Combobox(self, textvariable=self.n_images_box_value)
       n_images.grid(row=5, column=1)
       n_images['values'] = (3, 4, 5, 6, 9)

       #define text before image numbers
       img_name1_l = tk.Label(self, text="What is before the number/day in the image columns?", wraplength=150)
       img_name1_l.grid(row=6)
       img_name1_e = tk.Entry(self)
       img_name1_e.grid(row=6, column=1)

       # define text after image numbers
       img_name2_l = tk.Label(self, text="What is after the number/day in the image columns (if none, enter nothing)?", wraplength=150)
       img_name2_l.grid(row=7)
       img_name2_e = tk.Entry(self)
       img_name2_e.grid(row=7, column=1)

       # define image width
       width_l = tk.Label(self, text="What is the width of images you want?", wraplength=150)
       width_l.grid(row=8)
       width_e = tk.Entry(self)
       width_e.grid(row=8, column=1)

       def data_listing():
           master_list.append(final_col_e.get())
           master_list.append(img_count.get())
           master_list.append(self.n_images_box_value.get())
           master_list.append(img_name1_e.get())
           master_list.append(img_name2_e.get())
           master_list.append(width_e.get())

       def doit():
           infile = pd.read_csv(file_loc_list[0])
           loc_num = infile.get_loc(master_list[0])
           cols = list(infile.columns.values)
           c = 0
           cols_wanted = []
           while c < loc_num:
               new_col = cols[c]
               cols_wanted.append(new_col)
               c += 1
           return cols_wanted

       # define button
       tk.Button(self, text='Input Data', command=data_listing).grid(row=9, column=0)
       tk.Button(self, text='Select CSV File', command=self.getcsvfile).grid(row=9, column=1)
       tk.Button(self, text='Save Text File', command=self.gettextfile).grid(row=10, column=1)
       tk.Button(self, text='Process', command=doit).grid(row=11, column=1)

       # define options for opening a file
       self.open_opt = options = {}
       options['defaultextension'] = '.csv'
       options['filetypes'] = [('all files', '.*'), ('CSV files', '.csv')]
       options['parent'] = root
       options['title'] = 'Open a CSV file.'

       # define options for saving a file
       self.save_opt = options = {}
       options['defaultextension'] = '.txt'
       options['filetypes'] = [('all files', '.*'), ('text files', '.txt')]
       options['parent'] = root
       options['title'] = 'Save a text file.'

   def getcsvfile(self):
       # get filename
       csvfile = tkFileDialog.askopenfilename(**self.open_opt)

       file_loc_list.append(csvfile)


   def gettextfile(self):
       textfile = tkFileDialog.asksaveasfilename(**self.save_opt)

       file_loc_list.append(textfile)

if __name__=='__main__':
    root = tk.Tk()
    Files().grid()
    root.mainloop()

1 个答案:

答案 0 :(得分:1)

我认为您需要添加columns,因为Index.get_loc。最后按iloc选择所需的列:

import pandas as pd
import io

temp=u"""A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15"""
#after testing replace io.StringIO(temp) to filename
infile = pd.read_csv(io.StringIO(temp))
print (infile)
    A   B   C   D
0   0   1   2   3
1   4   5   6   7
2   8   9  10  11
3  12  13  14  15

loc_num = infile.columns.get_loc('C')
print (loc_num)
2
print (infile.iloc[:, :loc_num])
    A   B
0   0   1
1   4   5
2   8   9
3  12  13

print (infile.iloc[:, :loc_num + 1])
    A   B   C
0   0   1   2
1   4   5   6
2   8   9  10
3  12  13  14