Question

这是我正在研究的spreadsheet。如您所见，电子表格处于非常混乱的状态。我已经按照以下说明对数据进行了一些清理：

每个列标题均表示当天的期间/课程
每天有7节课/课，因此星期一至星期五= 35列
每个单元格包含班级描述符（前3个字符），老师的姓名（在“ $”符号后的3个字符）和房间名称（在“（”）后的3个字符）。 li>

我想将老师分为3个小组，并符合以下条件：

在每周的任何给定期间/课程中，有2位教师“免费”不教书，有1位教师在教书。
在同一时期的同一三合会中，有另一位“非自由”教师在教书，另外两名教师在教书
此外，在其他任何时期内，两名老师的其他组合均为“免费”非教学，而未使用的老师在教学。

请参见以下思路进行进一步说明：

首字母A和B在集合中，但不C在集合中集合中还包括C和A，但集合中不存在B 集合中还包含B和C，但集合中不存在A 这三个条件必须正确才能找到最终的三合会

因此，在数千种排列中，应该没有多少符合该条件的组合

简单来说，我要寻找的是将老师分成3组。在这样的情况下，可以有2位老师进入并观察另一位老师的课程，即在任何时期，2位老师都是免费的，其中1位老师是免费的。教学。在每一列中，您将看到在任何给定的日期和时间进行教学的所有教师。因此，不在我们可以推论的那列中的任何人都没有教导。我们希望三人一组作为三合会保留下来，以便每个人都能得到观察。因此，在一周中的任何其他时间段内，同一三合会的老师都在教书，而其他三人不在教书。

这是我到目前为止编写的用于清理数据并创建可能的三合会的代码。我不知道这是否是解决上述问题的最佳方法，但是无论如何，这是我到目前为止所做的。目前，我一直在寻找所有这些三合会之间的交集，以便正确识别符合上述条件的老师。

import pandas as pd
import numpy as np
import itertools

class unique_element:
    def __init__(self,value,occurrences):
        self.value = value
        self.occurrences = occurrences


def perm_unique(elements):
    eset=set(elements)
    listunique = [unique_element(i,elements.count(i)) for i in eset]
    u=len(elements)
    return perm_unique_helper(listunique,[0]*u,u-1)


def perm_unique_helper(listunique,result_list,d):
    if d < 0:
        yield tuple(result_list)
    else:
        for i in listunique:
            if i.occurrences > 0:
                result_list[d]=i.value
                i.occurrences-=1
                for g in  perm_unique_helper(listunique,result_list,d-1):
                    yield g
                i.occurrences+=1


def findsubsets(S, m):
    return set(itertools.combinations(S, m))


csv_file = pd.read_csv('Whole_School_TT.csv')
df = csv_file.dropna(how='all')
df = csv_file.fillna(0)
cols = df.columns
df_class_name = df.copy()
df_names = df.copy()
df_room_number = df.copy()

for col in range(0, len(df.columns)):
    for row in range(0, len(df)):
        if df[cols[col]].iloc[row] is not 0:
            text = df[cols[col]].iloc[row]
            index_dollar = df[cols[col]].iloc[row].find('$')
            r_index_dollar = df[cols[col]].iloc[row].rfind('$')
            if index_dollar is not -1:
                if index_dollar == r_index_dollar:
                    df_names[cols[col]].iloc[row] = df[cols[col]].iloc[row][index_dollar+1:index_dollar+4]
                else:
                    name1 = df[cols[col]].iloc[row][index_dollar + 1:index_dollar + 4]
                    name2 = df[cols[col]].iloc[row][r_index_dollar + 1:r_index_dollar + 4]
                    df_names[cols[col]].iloc[row] = name1 + ' ' + name2
                index_hash = df[cols[col]].iloc[row].find('#')
                df_class_name[cols[col]].iloc[row] = df[cols[col]].iloc[row][:(index_dollar - 1)]
                df_room_number[cols[col]].iloc[row] = df[cols[col]].iloc[row][index_hash + 1:-1]
            else:
                df_names[cols[col]].iloc[row] = 0
                index_hash = df[cols[col]].iloc[row].find('#')
                if index_hash is -1:
                    df_class_name[cols[col]].iloc[row] = df[cols[col]].iloc[row][:3]
                    df_room_number[cols[col]].iloc[row] = 0
                else:
                    df_class_name[cols[col]].iloc[row] = df[cols[col]].iloc[row][:(index_hash - 2 )]
                    df_room_number[cols[col]].iloc[row] = df[cols[col]].iloc[row][index_hash + 1:-1]

teacher_names = []
for col in range(0, len(cols)):
    period_names = (df_names[cols[col]].unique())
    teacher_names.extend(period_names)

df_all_names = pd.DataFrame(teacher_names, columns=['Names'])
df_all_names = pd.DataFrame(df_all_names['Names'].unique())
df_all_names = df_all_names[(df_all_names.T != 0).any()]
mask = (df_all_names[0].str.len() == 3)
df_single_names = df_all_names.loc[mask] # so now here we have all the teacher names in general who teach
# we will find the teacher who teach per period and teachers who do not teach
set_of_names = set(np.array(df_single_names[0]))  # here i have all the unique teacher names

period_set_names = [0]*len(cols)
period_set_names_NO_teach = [0]*len(cols)

# here i get the names for each one of the periods
# and find the intersection with the unique teacher names in order to figure out who teaches per period
for col in range(0, len(cols)):
    period_set_names[col] = set(np.array(df_names[cols[col]]))  # get teacher names for current period
    period_set_names_NO_teach[col] = set_of_names.difference(period_set_names[col])
    period_set_names[col] = set_of_names.intersection(period_set_names[col])
    # sanity check
    print('Teachers who teach and teacher who dont teach should be equivalent to the full list of names: ', end='')
    print(period_set_names_NO_teach[col].union(period_set_names[col]) == set_of_names)

def get_current_period_triplets(col):
    free_period_pairs = findsubsets(period_set_names_NO_teach[col], 2)  # I got all the period Free teacher pairs
    # teaching_period_pairs = findsubsets(period_set_names[col], 2)
    free_period_pairs_list = list(free_period_pairs)
    period_triplets = []
    for i in range(0, len(free_period_pairs_list)):
        listof = list(free_period_pairs_list)
        current_free_pair = list(listof[i])
        # print(current_free_pair)
        for j in (period_set_names[col]):
            temp = current_free_pair.copy()
            current_triplet = temp.append(j)
            period_triplets.append(tuple(temp))
    period_triplets = set(period_triplets)

    return period_triplets


for col in range(0, len(cols)):
    current_triplets = get_current_period_triplets(col)
    print(current_triplets)

发现具有特定条件的排列三元组的可能组合

0 个答案: