python从xlsx读取不同类型的数字

时间:2018-05-30 11:29:44

标签: python excel openpyxl

我有Python阅读以下xlsx。当我在类似的xlsx上再次运行代码但位于不同目录中时,我注意到数字类型的变化。我试图在xlsx中格式化单元格,因此它们是相同的,但它似乎不起作用。

在第一个xlsx上,我看到B1中的值为long,B15为float,但在第二个xlsx中,我将它们视为numpy.float64。

from openpyxl import load_workbook 
import pandas as pd
import xlrd
import string as st
from string import ascii_uppercase # allows creation of Excel "A1" reference
import numpy as np

#address_1='C:/Users/user/Desktop/target/new version/xlsx/new colour.xlsx'#new version/xlsx/new colour.xlsx'
address_1='C:/Users/user/Desktop/target/new/xlsx/colour.xlsx'

book_formula = load_workbook(address_1,data_only=False)# location of file

book = load_workbook(address_1,data_only=True)# location of file

l = list(st.ascii_uppercase) + [letter1 + letter2 for letter1 in ascii_uppercase for letter2 in ascii_uppercase]

#reference data i.e. = 

sheets_formula  = book_formula.get_sheet_names()

name = []
ref_equal_dup = []
ref_cell_dup = [] # this has duplicates this goes through each worksheet to get the cells in each 
index_1 = 0

def equal():
    ref_equal_dup.append(str('=') + l[col] + str(row+1))
    ref_equal_dup.append(str('=') + l[col] + '$' + str(row+1))
    ref_equal_dup.append(str('=') + '$' + l[col] + '$' + str(row+1))
    ref_equal_dup.append(str('=') + '$' + l[col] + str(row+1))

def cell():
    ref_cell_dup.append( l[col] + str(row+1))
    ref_cell_dup.append( l[col] + '$' + str(row+1))
    ref_cell_dup.append( '$' + l[col] + '$' + str(row+1))
    ref_cell_dup.append( '$' + l[col] + str(row+1))

while index_1 <len(sheets_formula):
    name.append((str('=') + str(sheets_formula[index_1]) + str('!')))  

    df = pd.DataFrame(book_formula[(sheets_formula[index_1])].values)  

    rows, cols = len(df.index) - 1, len(df.columns) - 1

    for col in range(cols):
        for row in range(rows):

            equal()

            cell()


    index_1 = index_1 + 1

# removes the dup from ref_cell_dup and ref_equal_dup:

ref_equal_dup_table = pd.DataFrame(np.array(ref_equal_dup).reshape(len(ref_equal_dup)/1,1),columns=['Cell'])

ref_cell_dup_table = pd.DataFrame(np.array(ref_cell_dup).reshape(len(ref_cell_dup)/1,1),columns=['Cell'])

# drops dups and keeps the first occurance 

ref_cell_flat = ref_cell_dup_table.drop_duplicates(keep ='first')
ref_equal_flat = ref_equal_dup_table.drop_duplicates(keep ='first')

ref_cell = list(ref_cell_flat.values.flatten())
ref_equal = list(ref_equal_flat.values.flatten())

# gets the worksheet!cell

wrk_cell = []
for x in (name):
    for y in (ref_cell):
        wrk_cell.append(x + y)

sheets_formula  = book_formula.get_sheet_names()

# gets the cell value and formula

index = 0

formula = []

def if_statements():
    if str(thecell) <> str(thecell_0):
        if (thecell) in str(wrk_cell + ref_equal):
            formula.append(['Cell Reference',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)[1:]])

        if (thecell) not in wrk_cell and thecell not in ref_equal and thecell is not None  and thecell <> 'nan':
            formula.append(['Formula',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)[1:]])



    elif thecell == thecell_0:
        if type(thecell) == unicode:
            formula.append(['u',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), thecell])

        elif type(thecell) == long: 
            formula.append([type(thecell),sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), float(thecell)])

      # elif str(type(thecell)) == "<type 'numpy.float64'>": 
      #      formula.append(['f',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), thecell])

        elif type(thecell) <> unicode:# and type(thecell) <> long: #and str(type(thecell)) <> "<type 'numpy.float64'>":
            formula.append([type(thecell),sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)])


while index < len(sheets_formula):
    df = pd.DataFrame(book_formula[(sheets_formula[index])].values)  

    df_0 = pd.DataFrame(book[(sheets_formula[index])].values)  

    rows, cols = len(df.index) , len(df.columns)

    for row in range(rows):
        for col in range(cols):

            thecell = df.iloc[row, col]

            thecell_0 = df_0.iloc[row, col]

            if thecell is not None:
                if_statements()

    index = index + 1

new_version = pd.DataFrame(np.array(formula).reshape(len(formula)-1/4,4),columns=['ACTION','SHEET_NAME','CELL_ADDRESS','CELL_VALUE'])

从python输出 enter image description here

xlsx格式

enter image description here

这背后的想法是比较数据集然后存储它,如果弹出新版本我想比较旧版本和新版本。这是由numpy库引起的问题?

0 个答案:

没有答案