这是我正在研究的spreadsheet。如您所见,电子表格处于非常混乱的状态。我已经按照以下说明对数据进行了一些清理:
我想将老师分为3个小组,并符合以下条件:
请参见以下思路进行进一步说明:
首字母A和B在集合中,但不C在集合中 集合中还包括C和A,但集合中不存在B 集合中还包含B和C,但集合中不存在A 这三个条件必须正确才能找到最终的三合会
因此,在数千种排列中,应该没有多少符合该条件的组合
简单来说,我要寻找的是将老师分成3组。在这样的情况下,可以有2位老师进入并观察另一位老师的课程,即在任何时期,2位老师都是免费的,其中1位老师是免费的。教学。在每一列中,您将看到在任何给定的日期和时间进行教学的所有教师。因此,不在我们可以推论的那列中的任何人都没有教导。 我们希望三人一组作为三合会保留下来,以便每个人都能得到观察。因此,在一周中的任何其他时间段内,同一三合会的老师都在教书,而其他三人不在教书。
这是我到目前为止编写的用于清理数据并创建可能的三合会的代码。我不知道这是否是解决上述问题的最佳方法,但是无论如何,这是我到目前为止所做的。目前,我一直在寻找所有这些三合会之间的交集,以便正确识别符合上述条件的老师。
import pandas as pd
import numpy as np
import itertools
class unique_element:
def __init__(self,value,occurrences):
self.value = value
self.occurrences = occurrences
def perm_unique(elements):
eset=set(elements)
listunique = [unique_element(i,elements.count(i)) for i in eset]
u=len(elements)
return perm_unique_helper(listunique,[0]*u,u-1)
def perm_unique_helper(listunique,result_list,d):
if d < 0:
yield tuple(result_list)
else:
for i in listunique:
if i.occurrences > 0:
result_list[d]=i.value
i.occurrences-=1
for g in perm_unique_helper(listunique,result_list,d-1):
yield g
i.occurrences+=1
def findsubsets(S, m):
return set(itertools.combinations(S, m))
csv_file = pd.read_csv('Whole_School_TT.csv')
df = csv_file.dropna(how='all')
df = csv_file.fillna(0)
cols = df.columns
df_class_name = df.copy()
df_names = df.copy()
df_room_number = df.copy()
for col in range(0, len(df.columns)):
for row in range(0, len(df)):
if df[cols[col]].iloc[row] is not 0:
text = df[cols[col]].iloc[row]
index_dollar = df[cols[col]].iloc[row].find('$')
r_index_dollar = df[cols[col]].iloc[row].rfind('$')
if index_dollar is not -1:
if index_dollar == r_index_dollar:
df_names[cols[col]].iloc[row] = df[cols[col]].iloc[row][index_dollar+1:index_dollar+4]
else:
name1 = df[cols[col]].iloc[row][index_dollar + 1:index_dollar + 4]
name2 = df[cols[col]].iloc[row][r_index_dollar + 1:r_index_dollar + 4]
df_names[cols[col]].iloc[row] = name1 + ' ' + name2
index_hash = df[cols[col]].iloc[row].find('#')
df_class_name[cols[col]].iloc[row] = df[cols[col]].iloc[row][:(index_dollar - 1)]
df_room_number[cols[col]].iloc[row] = df[cols[col]].iloc[row][index_hash + 1:-1]
else:
df_names[cols[col]].iloc[row] = 0
index_hash = df[cols[col]].iloc[row].find('#')
if index_hash is -1:
df_class_name[cols[col]].iloc[row] = df[cols[col]].iloc[row][:3]
df_room_number[cols[col]].iloc[row] = 0
else:
df_class_name[cols[col]].iloc[row] = df[cols[col]].iloc[row][:(index_hash - 2 )]
df_room_number[cols[col]].iloc[row] = df[cols[col]].iloc[row][index_hash + 1:-1]
teacher_names = []
for col in range(0, len(cols)):
period_names = (df_names[cols[col]].unique())
teacher_names.extend(period_names)
df_all_names = pd.DataFrame(teacher_names, columns=['Names'])
df_all_names = pd.DataFrame(df_all_names['Names'].unique())
df_all_names = df_all_names[(df_all_names.T != 0).any()]
mask = (df_all_names[0].str.len() == 3)
df_single_names = df_all_names.loc[mask] # so now here we have all the teacher names in general who teach
# we will find the teacher who teach per period and teachers who do not teach
set_of_names = set(np.array(df_single_names[0])) # here i have all the unique teacher names
period_set_names = [0]*len(cols)
period_set_names_NO_teach = [0]*len(cols)
# here i get the names for each one of the periods
# and find the intersection with the unique teacher names in order to figure out who teaches per period
for col in range(0, len(cols)):
period_set_names[col] = set(np.array(df_names[cols[col]])) # get teacher names for current period
period_set_names_NO_teach[col] = set_of_names.difference(period_set_names[col])
period_set_names[col] = set_of_names.intersection(period_set_names[col])
# sanity check
print('Teachers who teach and teacher who dont teach should be equivalent to the full list of names: ', end='')
print(period_set_names_NO_teach[col].union(period_set_names[col]) == set_of_names)
def get_current_period_triplets(col):
free_period_pairs = findsubsets(period_set_names_NO_teach[col], 2) # I got all the period Free teacher pairs
# teaching_period_pairs = findsubsets(period_set_names[col], 2)
free_period_pairs_list = list(free_period_pairs)
period_triplets = []
for i in range(0, len(free_period_pairs_list)):
listof = list(free_period_pairs_list)
current_free_pair = list(listof[i])
# print(current_free_pair)
for j in (period_set_names[col]):
temp = current_free_pair.copy()
current_triplet = temp.append(j)
period_triplets.append(tuple(temp))
period_triplets = set(period_triplets)
return period_triplets
for col in range(0, len(cols)):
current_triplets = get_current_period_triplets(col)
print(current_triplets)