假设:
dates = [
datetime(2014, 10, 11),
datetime(2014, 10, 1),
datetime(2014, 10, 2),
datetime(2014, 10, 3),
datetime(2014, 10, 5),
datetime(2014, 10, 5),
datetime(2014, 10, 6),
datetime(2014, 10, 22),
datetime(2014, 10, 20),
datetime(2014, 10, 21),
datetime(2014, 10, 9),
datetime(2014, 10, 7),
datetime(2014, 10, 6)
]
预期输出:
expect = [
[datetime(2014, 10, 1), datetime(2014, 10, 3)],
[datetime(2014, 10, 5), datetime(2014, 10, 7)],
datetime(2014, 10, 9),
datetime(2014, 10, 11),
[datetime(2014, 10, 20), datetime(2014, 10, 22)]
]
在Python中实施:
from datetime import datetime, timedelta
def parse_date_ranges(dates):
if(not dates or not len(dates) > 0):
return False
# make sure to order dates
dates.sort()
# init values
result = []
tupl = [dates[0], dates[0]]
it = iter(dates)
date = True
def add_tuple_to_result(tuple):
# if first part of tuple differs from last part -> add full tuple
# else -> add first part of tuple only
result.append(tupl if tupl[0] != tupl[1] else tupl[0])
while date:
# get next date or False if no next date
date = next(it, False)
# filter double dates
if(date in tupl):
continue
elif(date):
if(date - timedelta(days=1) == tupl[1]):
# consecutive date, so add date to end of current tuple
tupl[1] = date
else:
# gap larger than 1 day: add current tuple to result
# and create new tuple
add_tuple_to_result(tupl)
tupl = [date, date]
else:
# date == false, so this is the last step.
# add the current tuple to result
add_tuple_to_result(tupl)
return result
也可以参考https://gist.github.com/mattes/8987332进行更多测试。
问题
实现有效,但我是Python的新手。所以我想知道是否有更好的方法来解决这个问题?或者它没关系?
答案 0 :(得分:7)
我喜欢itertools:
from itertools import tee, zip_longest
from datetime import datetime, timedelta
one_day = timedelta(days=1)
def pairwise(iterable):
a, b = tee(iterable)
next(b, None)
return zip_longest(a, b, fillvalue=None)
def collapse_ranges(sorted_iterable, inc):
pairs = pairwise(sorted_iterable)
for start, tmp in pairs:
if inc(start) == tmp:
for end, tmp in pairs:
if inc(end) != tmp:
break
yield start, end
else:
yield start
# dates = [...]
numbers = [11, 1, 2, 3, 5, 5, 6, 22, 20, 21, 9, 7, 6]
if __name__ == '__main__':
import pprint
for each in collapse_ranges(sorted(set(dates)), lambda d: d + one_day):
pprint.pprint(each)
for each in collapse_ranges(sorted(set(numbers)), (1).__add__):
pprint.pprint(each)
结果:
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
datetime.datetime(2014, 10, 9, 0, 0)
datetime.datetime(2014, 10, 11, 0, 0)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
(1, 3)
(5, 7)
9
11
(20, 22)
答案 1 :(得分:3)
您可以通过将此answer调整为类似名称的左侧来避免重新发明轮子
Grouping consecutive dates together问题与datetime
个对象一起使用:
def parse_date_ranges(dates):
def group_consecutive(dates):
dates_iter = iter(sorted(set(dates))) # de-dup and sort
run = [next(dates_iter)]
for d in dates_iter:
if (d.toordinal() - run[-1].toordinal()) == 1: # consecutive?
run.append(d)
else: # [start, end] of range else singleton
yield [run[0], run[-1]] if len(run) > 1 else run[0]
run = [d]
yield [run[0], run[-1]] if len(run) > 1 else run[0]
return list(group_consecutive(dates)) if dates else False
答案 2 :(得分:1)
我为你写了另一个解决方案,写了几条评论试图解释代码。
from datetime import datetime, timedelta
dates = [
datetime(2014, 10, 11),
datetime(2014, 10, 1),
datetime(2014, 10, 2),
datetime(2014, 10, 3),
datetime(2014, 10, 5),
datetime(2014, 10, 5),
datetime(2014, 10, 6),
datetime(2014, 10, 22),
datetime(2014, 10, 20),
datetime(2014, 10, 21),
datetime(2014, 10, 9),
datetime(2014, 10, 7),
datetime(2014, 10, 6)
]
# Remove duplicates, and sort the dates ascending
sorted_dates = sorted(set(dates))
# Set initial first and last element as the current element
first, last = sorted_dates[0], sorted_dates[0]
date_ranges = []
# Loop over the sorted list from the second value
for d in sorted_dates[1:]:
# Check if the current date is exactly one day later then the current
# "last" date
if d - last != timedelta(days=1):
date_ranges.append(tuple(sorted({first, last})))
first, last = d, d
else:
last = d
# Handle last element
if first == last:
date_ranges.append((first,))
else:
date_ranges.append((first, last))
for dt_pair in date_ranges:
print dt_pair
输出:
(datetime.datetime(2014, 10, 1, 0, 0), datetime.datetime(2014, 10, 3, 0, 0))
(datetime.datetime(2014, 10, 5, 0, 0), datetime.datetime(2014, 10, 7, 0, 0))
(datetime.datetime(2014, 10, 9, 0, 0),)
(datetime.datetime(2014, 10, 11, 0, 0),)
(datetime.datetime(2014, 10, 20, 0, 0), datetime.datetime(2014, 10, 22, 0, 0))
答案 3 :(得分:1)
调整this answer以使用datetime对象。这包括非唯一和非排序的输入,也兼容python3:
utf8
示例:
import itertools
from datetime import datetime, timedelta
def datetimes_to_ranges(iterable):
iterable = sorted(set(iterable))
keyfunc = lambda t: t[1] - timedelta(days=t[0])
for key, group in itertools.groupby(enumerate(iterable), keyfunc):
group = list(group)
if len(group) == 1:
yield group[0][1]
else:
yield group[0][1], group[-1][1]