我一直在研究一个程序来对CSV进行一些解析,并安装了Anaconda2,并通过pip更新了最新的pandas软件包。但由于某些原因,无论程序多么简单,我都会在尝试使用get_loc时遇到错误。
import sys
import pandas as pd
infile = pd.read_csv("Path/to/CSV")
loc_num = infile.get_loc('ColumnName')
print loc_num
这就是错误:
loc_num = infile.get_loc(' Item') AttributeError:' DataFrame'对象没有属性' get_loc'
任何人都知道我做错了什么?大熊猫可能被错误安装了吗?
编辑:这是我正在进行的完整计划,因此您可以了解我正在处理的问题。import pandas as pd # import pandas library for csv processing, abbreviating it pd
import sys # import sys for writing to terminal
import Tkinter as tk
import tkFileDialog, ttk
file_loc_list = []
master_list=[]
class Files(tk.Frame):
def __init__(self):
tk.Frame.__init__(self)
master_list=[]
#define final column entry
final_col_l = tk.Label(self, text="What is the final column you want before the images?", wraplength=150)
final_col_l.grid(row=0)
final_col_e = tk.Entry(self)
final_col_e.grid(row=0, column=1)
# define what is used to count the images
img_count_l = tk.Label(self, text="Is this a calendar strip?")
img_count_l.grid(row=1)
img_count = tk.StringVar()
img_countnumber = ttk.Radiobutton(self, text="No", variable=img_count, value="number")
img_countday5 = ttk.Radiobutton(self, text='Yes (5 Days)', variable=img_count, value='day5')
img_countday7 = ttk.Radiobutton(self, text='Yes (7 Days)', variable=img_count, value='day7')
img_countnumber.grid(row=2, column=1)
img_countday5.grid(row=3, column=1)
img_countday7.grid(row=4, column=1)
#define number of images selection
n_images_l = tk.Label(self, text="How many images are you creating?", wraplength=150)
n_images_l.grid(row=5, column=0)
self.n_images_box_value = tk.IntVar()
n_images = ttk.Combobox(self, textvariable=self.n_images_box_value)
n_images.grid(row=5, column=1)
n_images['values'] = (3, 4, 5, 6, 9)
#define text before image numbers
img_name1_l = tk.Label(self, text="What is before the number/day in the image columns?", wraplength=150)
img_name1_l.grid(row=6)
img_name1_e = tk.Entry(self)
img_name1_e.grid(row=6, column=1)
# define text after image numbers
img_name2_l = tk.Label(self, text="What is after the number/day in the image columns (if none, enter nothing)?", wraplength=150)
img_name2_l.grid(row=7)
img_name2_e = tk.Entry(self)
img_name2_e.grid(row=7, column=1)
# define image width
width_l = tk.Label(self, text="What is the width of images you want?", wraplength=150)
width_l.grid(row=8)
width_e = tk.Entry(self)
width_e.grid(row=8, column=1)
def data_listing():
master_list.append(final_col_e.get())
master_list.append(img_count.get())
master_list.append(self.n_images_box_value.get())
master_list.append(img_name1_e.get())
master_list.append(img_name2_e.get())
master_list.append(width_e.get())
def doit():
infile = pd.read_csv(file_loc_list[0])
loc_num = infile.get_loc(master_list[0])
cols = list(infile.columns.values)
c = 0
cols_wanted = []
while c < loc_num:
new_col = cols[c]
cols_wanted.append(new_col)
c += 1
return cols_wanted
# define button
tk.Button(self, text='Input Data', command=data_listing).grid(row=9, column=0)
tk.Button(self, text='Select CSV File', command=self.getcsvfile).grid(row=9, column=1)
tk.Button(self, text='Save Text File', command=self.gettextfile).grid(row=10, column=1)
tk.Button(self, text='Process', command=doit).grid(row=11, column=1)
# define options for opening a file
self.open_opt = options = {}
options['defaultextension'] = '.csv'
options['filetypes'] = [('all files', '.*'), ('CSV files', '.csv')]
options['parent'] = root
options['title'] = 'Open a CSV file.'
# define options for saving a file
self.save_opt = options = {}
options['defaultextension'] = '.txt'
options['filetypes'] = [('all files', '.*'), ('text files', '.txt')]
options['parent'] = root
options['title'] = 'Save a text file.'
def getcsvfile(self):
# get filename
csvfile = tkFileDialog.askopenfilename(**self.open_opt)
file_loc_list.append(csvfile)
def gettextfile(self):
textfile = tkFileDialog.asksaveasfilename(**self.save_opt)
file_loc_list.append(textfile)
if __name__=='__main__':
root = tk.Tk()
Files().grid()
root.mainloop()
答案 0 :(得分:1)
我认为您需要添加columns
,因为Index.get_loc
。最后按iloc
选择所需的列:
import pandas as pd
import io
temp=u"""A,B,C,D
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15"""
#after testing replace io.StringIO(temp) to filename
infile = pd.read_csv(io.StringIO(temp))
print (infile)
A B C D
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
3 12 13 14 15
loc_num = infile.columns.get_loc('C')
print (loc_num)
2
print (infile.iloc[:, :loc_num])
A B
0 0 1
1 4 5
2 8 9
3 12 13
print (infile.iloc[:, :loc_num + 1])
A B C
0 0 1 2
1 4 5 6
2 8 9 10
3 12 13 14