使用递归或时间戳错误进行最佳假期搜索

时间:2018-04-30 12:53:50

标签: python pandas recursion calendar timestamp

我正在玩一种算法,以便在一年的日历中最佳地插入总共30天的三个休假期。

有时它可以工作(我还没有迭代过最佳),有时我会收到错误:ValueError: cannot set a Timestamp with a non-timestamp

我无法理解为什么它会偶尔起作用,但在其他时候它并没有。

完整代码如下。只需复制并运行。

''' Program to find three periods of vacation of varying length that does  not start in a weekend or holiday'''

from datetime import date
from random import randint, choice
import pandas as pd
from pandas.tseries.holiday import Holiday, AbstractHolidayCalendar


class Brazil(AbstractHolidayCalendar):
    # Brazilian official holidays for 2018
    rules = [Holiday('Universal', year=2018, month=1, day=1), Holiday('Carnaval', year=2018, month=2, day=12),
         Holiday('Carnaval', year=2018, month=2, day=13), Holiday('Carnaval', year=2018, month=2, day=14),
         Holiday('Paixão', year=2018, month=3, day=30), Holiday('Tiradentes', year=2018, month=4, day=21),
         Holiday('Trabalho', year=2018, month=5, day=1), Holiday('Christi', year=2018, month=5, day=31),
         Holiday('Independência', year=2018, month=9, day=7), Holiday('Aparecida', year=2018, month=10, day=12),
         Holiday('Servidor', year=2018, month=10, day=28), Holiday('Finados', year=2018, month=11, day=2),
         Holiday('República', year=2018, month=11, day=15), Holiday('Natal', year=2018, month=12, day=25)]

class Working:
    # Initiates a calendar for 2018 as a pandas DataFrame. Includes holidays and weekends as 'not_working' = True
    def __init__(self, start, end):
        self.cal = Brazil()
        self.df = pd.DataFrame()
        self.df['date'] = pd.date_range(start=start, end=end)
        self.holidays = self.cal.holidays(start=start, end=end)
        self.df['not_working'] = self.df.date.apply(lambda x: x.dayofweek == 5 or x.dayofweek == 6
                                                          or x in self.holidays)

    def total_working_days(self):
        # Calculates total working days
        print('Total working days this configuration is {} days'.format(sum(self.df['not_working'] == False)))

    def apply_vacation(self, d, offset=0):
        # Turns sequential number of days of 'not_working' into True
        self.df.loc[self.df['date'] == d, 'not_working'] = True
        if offset != 0:
            for i in range(offset):
                self.df.loc[self.df['date'] == (d + pd.Timedelta(days=(i + 1))), 'not_working'] = True

    def find_starting_day(self):
        # Finds a random 'not_working' day set to False which means a good day to start the vacation.
        selected = choice(self.df['date']).date()
        if self.df.loc[self.df['date'] == selected, 'not_working'].iloc[0] == True:
            self.find_starting_day()
        else:
            print('Selected day for this vacation period start is: {}'.format(selected))
            return selected

    def schedule(self, pack):
        # Distributes 30 days of vacation within three different periods
        for each in pack:
            d = self.find_starting_day()
            self.apply_vacation(d, each)


def three_periods():
    # Selects three periods that add up to 30 days
    a = randint(1, 28)
    b = randint(1, 29 - a)
    c = 30 - a - b
    print('Division of days is {}, {} and {}'.format(a, b, c))
    return a, b, c

if "__main__" == __name__:
    first_day = date(2018, 1, 1)
    final_day = date(2018, 12, 31)
    leisure = Working(first_day, final_day)
    a, b, c = three_periods()
    leisure.schedule((a, b, c))
    leisure.total_working_days()

0 个答案:

没有答案