我将此data
(小例子)导入到脚本中。
LastName StartTime EndTime Duration Period TeamAbbrev Position
Bouwmeester 0:00 0:37 0:37 1 STL D
Schwartz 0:00 0:40 0:40 1 STL W
Foligno 0:00 0:40 0:40 1 MIN W
Pietrangelo 0:00 0:48 0:48 1 STL D
Suter 0:00 0:40 0:40 1 MIN D
Staal 0:00 0:40 0:40 1 MIN C
Niederreiter 0:00 0:40 0:40 1 MIN W
Allen 0:00 20:00 20:00 1 STL G
Steen 0:00 0:30 0:30 1 STL W
Tarasenko 0:30 1:27 0:57 1 STL W
Parayko 0:37 1:43 1:06 1 STL D
这是脚本
import csv
from itertools import combinations, product
#Header = LastName StartTime EndTime Duration Period TeamAbbrev Position
#Import Game
with open('2017020397.csv', newline='') as f:
next(f)
skaters = '\n'.join(' '.join(row) for row in csv.reader(f))
data = skaters.splitlines()
def to_secs(ms):
''' Convert a mm:ss string to seconds '''
m, s = map(int, ms.split(':'))
return 60 * m + s
# Store a list of (start, end) times for each player
players = {}
for row in data:
name, start, end = row.split(None, 3)[:3]
times = to_secs(start), to_secs(end)
players.setdefault(name, []).append(times)
for t in players.items():
print(t)
print()
# Determine the amount of overlapping time for each combination of players
for p1, p2, p3 in combinations(sorted(players), 3):
total = 0
# Check each pair of times for this pair of players
for t1, t2, t3 in product(players[p1], players[p2], players[p3]):
# Compute the overlap in this pair of times and
# add it to the total for this pair of players
start, end = zip(t1, t2, t3)
total += max(0, min(end) - max(start))
if total:
print(p1, p2, p3, total)
输出:
Allen Niederreiter Pietrangelo 5481
Allen Niederreiter Prosser 2088
Allen Niederreiter Reilly 1464
目的是看看队友们互相比赛的情况。从Output
我们可以看到Allen
中的STL
与来自Niederreiter
的{{1}}配对。我只想和团队MIN
结合起来。 combinations
是如何识别的。另一个规定是TeamAbbrev
将根据当晚球队的比赛情况逐个改变比赛。打开任何和所有建议,谢谢!
修改:如果TeamAbbrev
比int()
更容易str()
我可以抓取TeamAbbrev
而不是数字。
答案 0 :(得分:0)
在'for row in data'后添加...
teams = row.split()[4]
# if the number of occurrences of the first item (which is a team)... is equal to the length of the list of teams, then, all the players are from the same team.
if teams.count(teams[0]) == len(teams):
#same lines, but one indentation block due to the `if` condition.
答案 1 :(得分:0)
你的问题不容易回答,但我会尝试。我做了一些假设:
现在有两个文件:
<强> 2017020397.csv 强>
LastName,StartTime,EndTime,Duration,Period,TeamAbbrev,Position
Bouwmeester,0:00,0:37,0:37,1,STL,D
Schwartz,0:00,0:40,0:40,1,STL,W
Foligno,0:00,0:40,0:40,1,MIN,W
Pietrangelo,0:00,0:48,0:48,1,STL,D
Suter,0:00,0:40,0:40,1,MIN,D
Staal,0:00,0:40,0:40,1,MIN,C
Niederreiter,0:00,0:40,0:40,1,MIN,W
Allen,0:00,20:00,20:00,1,STL,G
Steen,0:00,0:30,0:30,1,STL,W
Tarasenko,0:30,1:27,0:57,1,STL,W
Parayko,0:37,1:43,1:06,1,STL,D
<强> solution.py 强>
import csv
import re
import itertools
pattern_time = r"(\d{1,2}):(\d{1,2})"
time_tester = re.compile(pattern_time)
def convert_to_seconds(time_string):
''' Convert a mm:ss string to seconds '''
pattern_found = time_tester.match(time_string)
if pattern_found:
time_string_separated = pattern_found.group(1, 2)
minutes, seconds = map(int, time_string_separated)
return 60 * minutes + seconds
else:
# We have a problem
return 0
file_name = '2017020397.csv'
teams = {}
number_of_players_to_compare = 3
with open(file_name, newline='') as source_file:
csv_file = csv.DictReader(source_file)
for row in csv_file:
if row['TeamAbbrev'] not in teams:
teams[row['TeamAbbrev']] = {}
current_team = teams[row['TeamAbbrev']]
if row['Period'] not in current_team:
current_team[row['Period']] = {}
current_team_period = current_team[row['Period']]
if row['LastName'] not in current_team_period:
current_team_period[row['LastName']] = []
current_skater = current_team_period[row['LastName']]
times_recorded = {'StartTime': convert_to_seconds(row['StartTime']),
'EndTime': convert_to_seconds(row['EndTime'])}
current_skater.append(times_recorded)
for (current_team_to_show, current_periods) in teams.items():
current_periods_sorted = sorted(current_periods)
for current_period_name in current_periods_sorted:
print("\nFor team", current_team_to_show, "in period", current_period_name, ":")
current_period = current_periods[current_period_name]
current_players = sorted(current_period)
for current_player_combination in itertools.combinations(current_players, number_of_players_to_compare):
total = 0
for times_this_combination in itertools.product(*(current_period[x] for x in current_player_combination)):
start_times = (x['StartTime'] for x in times_this_combination)
end_times = (x['EndTime'] for x in times_this_combination)
total += max(0, min(end_times) - max(start_times))
print(" ".join(current_player_combination), total)
以下是关于我是如何做到的一些评论:
DictReader
,因此我不必跳过第一行,并且能够通过列名获取行的每个部分。如果您有任何疑问,请随时提出。