需要一个建议
我的代码:
import pandas as pd
import openpyxl
df = pd.read_excel('workbook.xlsx', sheet_name='Sheet1')
df_writer = pd.ExcelWriter('workbook.xlsx', engine='openpyxl')
excel = openpyxl.load_workbook('workbook.xlsx')
df_writer.book = excel
for string in ['A','B','C','D','E','F']:
df[df['column 1'].str.contains(string)].to_excel(df_writer,sheet_name=string)
writer.save()
writer.close()
现在,我将包含某些字符串的数据分离出来,并保存在同一工作簿的不同工作表中。
每个工作表分别命名为'A','B','C','D','E','F' 我需要将名为“ A”和“ B”的工作表合并在一起,以便在同一工作簿中使用不同的名称进行进一步分析,例如“合并” 工作表“ A”和“ B”具有相同数量的列和标题。
任何建议将不胜感激。
答案 0 :(得分:1)
使用带有A|B
的字符串包含方法在列中找到A
或B
中的一个值并将其包括在内,将其另存为新的工作表:
import pandas as pd
import openpyxl
df = pd.read_excel('workbook.xlsx', sheet_name='Sheet1')
df_writer = pd.ExcelWriter('workbook.xlsx', engine='openpyxl')
excel = openpyxl.load_workbook('workbook.xlsx')
df_writer.book = excel
for string in ['A','B','C','D','E','F']:
df[df['column 1'].str.contains(string)].to_excel(df_writer,sheet_name=string)
df[df['column 1'].str.contains('A|B')].to_excel(df_writer,sheet_name='Combined')
#Now your excel contains all 'A','B','C','D','E','F' and 'Combined' sheet.
writer.save()
writer.close()
答案 1 :(得分:0)
代码适用于在所有工作表中包含相同列数的.xlsx文件。 但是,可以进行修改以合并具有不同编号的工作表中的数据。列数。
注意:.xlsx文件没有相同的编号时。表格中的列数,它将替换redo_files_directory中的该文件。
import numpy as np
import os
from openpyxl import *
import shutil
directory = 'filepath'
new_folder = 'New_Files_Directory'
redo_files_directory = 'Folder_for_files_with_different_num_of_columns'
def checkEqual1(iterator):
iterator = iter(iterator)
try:
first = next(iterator)
except StopIteration:
return True
return all(first == rest for rest in iterator)
for root, dirs, files in os.walk(directory):
for file in files:
filename = file
file = os.path.join(root, file)
wb2 = load_workbook(file)
list_of_all_sheets = wb2.sheetnames
array_1 = np.array([])
array_2 = np.array([])
array_3 = np.array([])
array_4 = np.array([])
if len(list_of_all_sheets) != 0:
for worksheet in wb2.worksheets:
array_1 = np.append(array_2, worksheet.max_column)
array_2 = array_1
array_1 = array_1.astype(int)
array_3 = np.append(array_4, worksheet.max_row)
array_4 = array_3
array_3 = array_3.astype(int)
else:
dest = shutil.move(file, redo_files_directory, copy_function = shutil.copytree)
# array_1 is an array of max. num. of columns from all sheets
# array_2 -----------/------------ of rows --------/---------
k = 0
for i in range(0, len(array_3)):
k = k + array_3[i];
# k is the sum of the rows from all spreadsheets
z = 1
wb = Workbook()
ws = wb.active
ws.title = 'Sheet_1'
if checkEqual1(array_1) == True:
while z <= k:
for sheet in wb2.worksheets:
r = 1
while r <= sheet.max_row:
c = 1
while c <= sheet.max_column:
h_1 = sheet.cell(row = r, column = c).value
c_1 = ws.cell(row = z, column = c)
c_1.value = h_1
c = c + 1
r = r + 1
z = z + 1
else:
dest = shutil.move(file, redo_files_directory, copy_function = shutil.copytree)
wb.save(new_folder+filename)