我有Python阅读以下xlsx。当我在类似的xlsx上再次运行代码但位于不同目录中时,我注意到数字类型的变化。我试图在xlsx中格式化单元格,因此它们是相同的,但它似乎不起作用。
在第一个xlsx上,我看到B1中的值为long,B15为float,但在第二个xlsx中,我将它们视为numpy.float64。
from openpyxl import load_workbook
import pandas as pd
import xlrd
import string as st
from string import ascii_uppercase # allows creation of Excel "A1" reference
import numpy as np
#address_1='C:/Users/user/Desktop/target/new version/xlsx/new colour.xlsx'#new version/xlsx/new colour.xlsx'
address_1='C:/Users/user/Desktop/target/new/xlsx/colour.xlsx'
book_formula = load_workbook(address_1,data_only=False)# location of file
book = load_workbook(address_1,data_only=True)# location of file
l = list(st.ascii_uppercase) + [letter1 + letter2 for letter1 in ascii_uppercase for letter2 in ascii_uppercase]
#reference data i.e. =
sheets_formula = book_formula.get_sheet_names()
name = []
ref_equal_dup = []
ref_cell_dup = [] # this has duplicates this goes through each worksheet to get the cells in each
index_1 = 0
def equal():
ref_equal_dup.append(str('=') + l[col] + str(row+1))
ref_equal_dup.append(str('=') + l[col] + '$' + str(row+1))
ref_equal_dup.append(str('=') + '$' + l[col] + '$' + str(row+1))
ref_equal_dup.append(str('=') + '$' + l[col] + str(row+1))
def cell():
ref_cell_dup.append( l[col] + str(row+1))
ref_cell_dup.append( l[col] + '$' + str(row+1))
ref_cell_dup.append( '$' + l[col] + '$' + str(row+1))
ref_cell_dup.append( '$' + l[col] + str(row+1))
while index_1 <len(sheets_formula):
name.append((str('=') + str(sheets_formula[index_1]) + str('!')))
df = pd.DataFrame(book_formula[(sheets_formula[index_1])].values)
rows, cols = len(df.index) - 1, len(df.columns) - 1
for col in range(cols):
for row in range(rows):
equal()
cell()
index_1 = index_1 + 1
# removes the dup from ref_cell_dup and ref_equal_dup:
ref_equal_dup_table = pd.DataFrame(np.array(ref_equal_dup).reshape(len(ref_equal_dup)/1,1),columns=['Cell'])
ref_cell_dup_table = pd.DataFrame(np.array(ref_cell_dup).reshape(len(ref_cell_dup)/1,1),columns=['Cell'])
# drops dups and keeps the first occurance
ref_cell_flat = ref_cell_dup_table.drop_duplicates(keep ='first')
ref_equal_flat = ref_equal_dup_table.drop_duplicates(keep ='first')
ref_cell = list(ref_cell_flat.values.flatten())
ref_equal = list(ref_equal_flat.values.flatten())
# gets the worksheet!cell
wrk_cell = []
for x in (name):
for y in (ref_cell):
wrk_cell.append(x + y)
sheets_formula = book_formula.get_sheet_names()
# gets the cell value and formula
index = 0
formula = []
def if_statements():
if str(thecell) <> str(thecell_0):
if (thecell) in str(wrk_cell + ref_equal):
formula.append(['Cell Reference',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)[1:]])
if (thecell) not in wrk_cell and thecell not in ref_equal and thecell is not None and thecell <> 'nan':
formula.append(['Formula',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)[1:]])
elif thecell == thecell_0:
if type(thecell) == unicode:
formula.append(['u',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), thecell])
elif type(thecell) == long:
formula.append([type(thecell),sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), float(thecell)])
# elif str(type(thecell)) == "<type 'numpy.float64'>":
# formula.append(['f',sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), thecell])
elif type(thecell) <> unicode:# and type(thecell) <> long: #and str(type(thecell)) <> "<type 'numpy.float64'>":
formula.append([type(thecell),sheets_formula[index].encode('utf-8'),l[col] + str(row + 1), str(thecell)])
while index < len(sheets_formula):
df = pd.DataFrame(book_formula[(sheets_formula[index])].values)
df_0 = pd.DataFrame(book[(sheets_formula[index])].values)
rows, cols = len(df.index) , len(df.columns)
for row in range(rows):
for col in range(cols):
thecell = df.iloc[row, col]
thecell_0 = df_0.iloc[row, col]
if thecell is not None:
if_statements()
index = index + 1
new_version = pd.DataFrame(np.array(formula).reshape(len(formula)-1/4,4),columns=['ACTION','SHEET_NAME','CELL_ADDRESS','CELL_VALUE'])
xlsx格式
这背后的想法是比较数据集然后存储它,如果弹出新版本我想比较旧版本和新版本。这是由numpy库引起的问题?