xlrd命名范围示例?

时间:2016-09-15 08:24:21

标签: python-3.x xlrd

我有一个excel电子表格,我试图用 xlrd 解析。电子表格本身广泛使用命名范围。

如果我使用:

for name in book.name_map:
    print(name)

我可以看到所有的名字都在那里。

但是我不能使任何方法起作用(cell method和area2d)。任何人都可以给我一个语法示例,以便能够读取名称指向的单元格区域。

Excel文件是一个 XLSM 文件,其中包含许多可视化基础,也可以在这些命名范围内运行。

1 个答案:

答案 0 :(得分:1)

我认为XLRD中的命名支持已经破坏了XLSM文件但我通过切换到openpyxl找到了答案。它有一个函数get_named_ranges(),它包含所有命名范围。之后的支持有点薄,所以我编写了自己的类来将我的电子表格中的命名范围转换为一个类,我可以使用相同的名称访问相同的信息。     # - - 编码:utf-8 - -     “””     创建于2016年9月14日星期三09:42:09

@author: ellwood
"""

from openpyxl import load_workbook

class NamedArray(object):

    ''' Named range object
    '''
    C_CAPS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'   

    def __init__(self, workbook, named_range_raw):
        ''' Initialise a NameArray object from the named_range_raw information in the given
            workbook
        '''
        self.wb = workbook
        sheet_str, cellrange_str = str(named_range_raw).split('!')
        self.sheet =sheet_str.split('"')[1]
        self.loc = self.wb[self.sheet]
        if ':' in cellrange_str:
            self.has_range = True
            self.has_value = False
            lo,hi = cellrange_str.split(':')
            self.ad_lo = lo.replace('$','')
            self.ad_hi = hi.replace('$','')
        else:
            self.has_range = False
            self.has_value = True
            self.ad_lo = cellrange_str.replace('$','')
            self.ad_hi = self.ad_lo

        self.min_row = self.get_row(self.ad_lo) 
        self.max_row = self.get_row(self.ad_hi)
        self.rows = self.max_row - self.min_row + 1
        self.min_col = self.col_to_n(self.ad_lo)
        self.max_col = self.col_to_n(self.ad_hi)
        self.cols    = self.max_col - self.min_col + 1

    def size_of(self):
        ''' Returns two dimensional size of named space
        '''
        return self.cols, self.rows 

    def cols(self):
        ''' Returns number of cols in named space
        '''
        return self.cols

    def rows(self):
        ''' Returns number of rows in named space
        '''
        return self.rows

    def value(self, r=1, c=1):
       ''' Returns the value at row r, column c
       '''
       if self.has_value:
          return self.loc.cell(self.ad_lo).value

       assert r < self.max_rows
       assert c < self.max_cols
       return self.loc.cell(self.n_to_col(self.min_col + c-1)+str(self.min_row+r-1)).value        

    def is_range(self):
       ''' if true then name defines a table more than 1 cell
       '''
       return self.range

    def is_value(self):
        ''' if true then name defines the location of a single value
        '''
        return None

    def __str__(self):
        ''' printed description of named space
        '''
        locs = 's ' + self.ad_lo + ':' + self.ad_hi if self.is_range else ' ' + self.ad_lo 
        return('named range'+ str(self.size_of()) + ' in sheet ' + self.sheet + ' @ location' + locs)  

    @classmethod
    def get_row(cls, ad):
        ''' get row number from cell string
        Cell string is assumed to be in excel format i.e "ABC123" where row is 123
        '''
        row = 0
        for l in ad:
            if l in "1234567890":
                row = row*10 + int(l)
        return row

    @classmethod
    def col_to_n(cls, ad):
        ''' find column number from xl address
            Cell string is assumed to be in excel format i.e "ABC123" where column is abc
            column number is integer represenation i.e.(A-A)*26*26 + (B-A)*26 + (C-A)
        '''
        n = 0
        for l in ad:
            if l in cls.C_CAPS:
                n = n*26 + cls.C_CAPS.find(l)+1
        return n

    @classmethod
    def n_to_col(cls,n):
        ''' make xl column address from column number
        '''
        ad = ''
        while n > 0:
            ad = cls.C_CAPS[n%26-1] + ad  
            n = n // 26
        return ad


class Struct(object):
    ''' clast which turns a dictionary into a structure
    '''
    def __init__(self, **entries): 
        self.__dict__.update(entries)

    def repr__(self): 
        return '<%s>' % str('\n '.join('%s : %s' % (k, repr(v)) for (k, v) in self.__dict.iteritems())) 


def get_names(workbook):
    ''' Get a structure containing all of the names in the workbook
    '''
    named_ranges = wb.get_named_ranges()    
    name_list = {}
    for named_range in named_ranges:
        name = named_range.name
        if name[0:2] == 'n_':
            # only store the names beginning with 'n_'
            name_list[name[2:]] = NamedArray(wb, str(named_range))
    for item in name_list:
        print (item, '=', name_list[item])
    return Struct(**name_list)

# ------------------
# program example
# -----------------        

wb = load_workbook('test.xlsm', data_only=True) 

n = get_names(wb)
print(n.my_name.value())

一个小优化是我将所有我感兴趣的名称作为'n_'导入的名称,这样我就可以忽略任何内置的Excel名称。我希望这对某人有用。