我只想从excel文件中将某些列读入pandas数据帧。我想通过usecols
通过列范围指定这个来指定这个,但是我遇到了错误。将列范围指定为字母的正确方法是什么(是的,我知道我可以提供列指示)?
import pandas as pd
df = pd.read_excel("test.xlsx", usecols="A:AV", header=None)
〜/ anaconda3 / lib / python3.5 / site-packages / pandas / io / excel.py in read_excel(io,sheetname,header,skiprows,skip_footer,index_col, names,parse_cols,parse_dates,date_parser,na_values,thousand, convert_float,has_index_names,converter,dtype,true_values, false_values,engine,squeeze,** kwds) 207 skip_footer = skip_footer,converter = converters,dtype = dtype, 208 true_values = true_values,false_values = false_values,squeeze = squeeze, - > 209 ** kwds) 210 211
〜/ anaconda3 / lib / python3.5 / site-packages / pandas / io / excel.py in _parse_excel(self,sheetname,header,skiprows,names,skip_footer,index_col,has_index_names,parse_cols,parse_dates,date_parser, na_values,thousands,convert_float,true_values,false_values, 详细,dtype,挤压,** kwds) 508挤压=挤压, 509 dtype = dtype, - > 510 ** kwds) 511 512输出[asheetname] = parser.read()
〜/ anaconda3 / lib / python3.5 / site-packages / pandas / io / parsers.py in TextParser(* args,** kwds)1910""" 1911年kwds ['引擎'] = '蟒' - > 1912年返回TextFileReader(* args,** kwds)1913 1914
〜/ anaconda3 / lib / python3.5 / site-packages / pandas / io / parsers.py in init (自我,f,引擎,** kwds) 762 self.options [' has_index_names'] = kwds [' has_index_names'] 763 - > 764 self._make_engine(self.engine) 765 766 def close(self):
〜/ anaconda3 / lib / python3.5 / site-packages / pandas / io / parsers.py in _make_engine(自我,引擎) 993' " c"," python",或' ' "蟒-FWF&#34)' .format( 994引擎=引擎)) - > 995 self._engine = klass(self.f,** self.options) 996 997 def _failover_to_python(self):
〜/ anaconda3 / lib / python3.5 / site-packages / pandas / io / parsers.py in init (self,f,** kwds)1994#从self.usecols中推断列索引if if是否已指定。 1995 self._col_indices = 没有 - > 1996 self.columns,self.num_original_columns = self._infer_columns()1997 1998#Now self.columns has 我们将处理的一组列。
〜/ anaconda3 / lib / python3.5 / site-packages / pandas / io / parsers.py in _infer_columns(self)2387 else:2388 columns = [lrange(ncols)] - > 2389列= self._handle_usecols(列,列[0])2390否则:2391 if self.usecols是None或len(names)> = num_original_columns:
〜/ anaconda3 / lib / python3.5 / site-packages / pandas / io / parsers.py in _handle_usecols(self,columns,usecols_key)2422 for col in self.usecols:2423 if isinstance(col,string_types): - > 2424 col_indices.append(usecols_key.index(col))2425
else:2426 col_indices.append(col)
In [20]: pd.__version__
Out[20]: '0.20.2'
档案:test.xlsx
答案 0 :(得分:2)
需要升级到pandas 0.21.0
(dev discussion):
df = pd.read_excel("test.xlsx", usecols='A:B', header=None)
print (df.head())
0 1
0 sequence 2015-02-11 00:00:00
1 Aa 6239
2 AaBpL5 NaN
3 AaCjL5 NaN
4 AaDrL2 NaN