我有一个excel电子表格,我试图用 xlrd 解析。电子表格本身广泛使用命名范围。
如果我使用:
for name in book.name_map:
print(name)
我可以看到所有的名字都在那里。
但是我不能使任何方法起作用(cell method和area2d)。任何人都可以给我一个语法示例,以便能够读取名称指向的单元格区域。
Excel文件是一个 XLSM 文件,其中包含许多可视化基础,也可以在这些命名范围内运行。
答案 0 :(得分:1)
我认为XLRD中的命名支持已经破坏了XLSM文件但我通过切换到openpyxl找到了答案。它有一个函数get_named_ranges(),它包含所有命名范围。之后的支持有点薄,所以我编写了自己的类来将我的电子表格中的命名范围转换为一个类,我可以使用相同的名称访问相同的信息。 # - - 编码:utf-8 - - “”” 创建于2016年9月14日星期三09:42:09
@author: ellwood
"""
from openpyxl import load_workbook
class NamedArray(object):
''' Named range object
'''
C_CAPS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
def __init__(self, workbook, named_range_raw):
''' Initialise a NameArray object from the named_range_raw information in the given
workbook
'''
self.wb = workbook
sheet_str, cellrange_str = str(named_range_raw).split('!')
self.sheet =sheet_str.split('"')[1]
self.loc = self.wb[self.sheet]
if ':' in cellrange_str:
self.has_range = True
self.has_value = False
lo,hi = cellrange_str.split(':')
self.ad_lo = lo.replace('$','')
self.ad_hi = hi.replace('$','')
else:
self.has_range = False
self.has_value = True
self.ad_lo = cellrange_str.replace('$','')
self.ad_hi = self.ad_lo
self.min_row = self.get_row(self.ad_lo)
self.max_row = self.get_row(self.ad_hi)
self.rows = self.max_row - self.min_row + 1
self.min_col = self.col_to_n(self.ad_lo)
self.max_col = self.col_to_n(self.ad_hi)
self.cols = self.max_col - self.min_col + 1
def size_of(self):
''' Returns two dimensional size of named space
'''
return self.cols, self.rows
def cols(self):
''' Returns number of cols in named space
'''
return self.cols
def rows(self):
''' Returns number of rows in named space
'''
return self.rows
def value(self, r=1, c=1):
''' Returns the value at row r, column c
'''
if self.has_value:
return self.loc.cell(self.ad_lo).value
assert r < self.max_rows
assert c < self.max_cols
return self.loc.cell(self.n_to_col(self.min_col + c-1)+str(self.min_row+r-1)).value
def is_range(self):
''' if true then name defines a table more than 1 cell
'''
return self.range
def is_value(self):
''' if true then name defines the location of a single value
'''
return None
def __str__(self):
''' printed description of named space
'''
locs = 's ' + self.ad_lo + ':' + self.ad_hi if self.is_range else ' ' + self.ad_lo
return('named range'+ str(self.size_of()) + ' in sheet ' + self.sheet + ' @ location' + locs)
@classmethod
def get_row(cls, ad):
''' get row number from cell string
Cell string is assumed to be in excel format i.e "ABC123" where row is 123
'''
row = 0
for l in ad:
if l in "1234567890":
row = row*10 + int(l)
return row
@classmethod
def col_to_n(cls, ad):
''' find column number from xl address
Cell string is assumed to be in excel format i.e "ABC123" where column is abc
column number is integer represenation i.e.(A-A)*26*26 + (B-A)*26 + (C-A)
'''
n = 0
for l in ad:
if l in cls.C_CAPS:
n = n*26 + cls.C_CAPS.find(l)+1
return n
@classmethod
def n_to_col(cls,n):
''' make xl column address from column number
'''
ad = ''
while n > 0:
ad = cls.C_CAPS[n%26-1] + ad
n = n // 26
return ad
class Struct(object):
''' clast which turns a dictionary into a structure
'''
def __init__(self, **entries):
self.__dict__.update(entries)
def repr__(self):
return '<%s>' % str('\n '.join('%s : %s' % (k, repr(v)) for (k, v) in self.__dict.iteritems()))
def get_names(workbook):
''' Get a structure containing all of the names in the workbook
'''
named_ranges = wb.get_named_ranges()
name_list = {}
for named_range in named_ranges:
name = named_range.name
if name[0:2] == 'n_':
# only store the names beginning with 'n_'
name_list[name[2:]] = NamedArray(wb, str(named_range))
for item in name_list:
print (item, '=', name_list[item])
return Struct(**name_list)
# ------------------
# program example
# -----------------
wb = load_workbook('test.xlsm', data_only=True)
n = get_names(wb)
print(n.my_name.value())
一个小优化是我将所有我感兴趣的名称作为'n_'导入的名称,这样我就可以忽略任何内置的Excel名称。我希望这对某人有用。