我有一个睡眠时间信息的数据集,并希望使用Python生成它的可视化。
.csv
数据集我看起来像这样:
SleepStartDate,SleepStartTime,SleepStopTime
17/03/2017,23:45,07:25
19/03/2017,01:05,09:10
19/03/2017,23:50,08:25
我想要生成的可视化应该类似于以下内容: 图片来源:http://quantifiedself.com/wp-content/uploads/2015/08/qs2.png
我知道这是一个非常简单的可视化,并且想象它已经内置到一些已经存在的库中,但是我最好的Google搜索工作无法找到它。如果有人能指出我正确的方向,我会非常感激。
提前感谢您的时间和智慧。
答案 0 :(得分:1)
经典的Python选择是使用matplotlib包。查看示例图表,它看起来像一个垂直条形图。
答案 1 :(得分:0)
找不到具有我想要的功能的库,所以最后写了一个脚本为我自己做了:
<强>脚本:强>
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import matplotlib.patches as patches
import datetime as dt
import csv
import sys
MINUTES_IN_DAY = 1440.0
COLUMN_COLOUR = 'b'
# Graph data using matplotlib visualization
def plotData(data,columnColour,maxDate,minDate):
# Set up an invisible background scatterplot give graph the correct size
# Make a series of events that are one day apart
x = mpl.dates.drange(minDate,maxDate,dt.timedelta(days=1))
# Offset first event to top of graph to give correct height
x[0] += 0.85
# Extract the time using a modulo 1, and adding an arbitrary base date
# int used so that y-axis starts at midnight
times = x % 1 + int(x[0])
fig = plt.figure()
fig.suptitle('Daily Sleep Patterns', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)
# Set background scatterplot to invisible
ax.plot_date(x, times, 'ro', color='w', visible=False)
ax.yaxis_date()
fig.autofmt_xdate()
start, end = ax.get_ylim()
# Fix division sizes and labels to show hours on y-axis
hourDivision = 1.0 / 24.0
ax.yaxis.set_ticks(np.arange(start,end,hourDivision))
ax.set_yticklabels(['Midnight','1am','2am','3am','4am','5am','6am','7am','8am','9am','10am','11am','Midday','1pm','2pm','3pm','4pm','5pm','6pm','7pm','8pm','9pm','10pm','11pm','Midnight'])
# Iterate through data
for i in range(0,len(data)):
# If period starts and finishes on different days, slit and add to both days
if data[i].startTime > data[i].stopTime:
currentDataItem = data[i]
currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
currentDate -= dt.timedelta(days=0.5)
tomorrow = currentDate + dt.timedelta(days=1)
plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=1, facecolor=columnColour, alpha=0.5)
theDayAfterTomorrow = tomorrow + dt.timedelta(days=1)
plt.axvspan(xmin=tomorrow, xmax=theDayAfterTomorrow, ymin=0, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)
# Else, add to given day
else:
currentDataItem = data[i]
currentDate = dt.datetime(currentDataItem.year,currentDataItem.month,currentDataItem.day)
currentDate -= dt.timedelta(days=0.5)
tomorrow = currentDate + dt.timedelta(days=1)
plt.axvspan(xmin=currentDate, xmax=tomorrow, ymin=currentDataItem.startTime, ymax=currentDataItem.stopTime, facecolor=columnColour, alpha=0.5)
ax.set_ylabel('Hours',fontweight='bold')
#ax.legend()
ax.grid(True)
plt.show()
# Read data from csv file
def readDataFromFile(dataFile):
f = open(dataFile,'rt')
listOfInputLists = []
try:
reader = csv.reader(f)
for row in reader:
listOfInputLists.append(row)
finally:
f.close()
return listOfInputLists
# Class to store time and date data read from file
class sleepInstance(object):
def __init__(self,listOfInputLists):
self.day = 0
self.month = 0
self.year = 0
self.formatDate(listOfInputLists[0])
self.startTime = self.formatTime(listOfInputLists[1])
self.stopTime = self.formatTime(listOfInputLists[2])
# Extracts date information variables
def formatDate(self,unformattedDate):
date = dt.datetime.strptime(unformattedDate,"%d/%m/%y")
self.day = int(date.strftime("%d"))
self.month = int(date.strftime("%m"))
self.year = int(date.strftime("%Y"))
# Formats time as a decimal fraction of day, for use in graph
def formatTime(self,unformattedTime):
timeSinceMidnight = dt.datetime.strptime(unformattedTime,'%H:%M:%S')
midnight = dt.datetime(1900,1,1)
minutesSinceMidnight = ((timeSinceMidnight - midnight).total_seconds() / 60.0)
fractionOfDay = minutesSinceMidnight / MINUTES_IN_DAY
return fractionOfDay
# Formats data read from file as a list of sleepInstance objects
def formatDataForPlot(listOfInputLists):
sleeps = []
for i in range(1,len(listOfInputLists)):
sleeps.append(sleepInstance(listOfInputLists[i]))
return sleeps
# Extracts earliest (min) and latest (max) dates from data, for use in setting graph limits
def getMaxAndMinDates(plotDataList):
dateTimeList = []
for item in plotDataList:
nextDate = dt.datetime(item.year,item.month,item.day)
dateTimeList.append(nextDate)
maxDate = max(dateTimeList)
minDate = min(dateTimeList)
return maxDate, minDate
dataFile = 'sleepData.csv'
listOfInputLists = readDataFromFile(dataFile)
plotDataList = formatDataForPlot(listOfInputLists)
maxDate, minDate = getMaxAndMinDates(plotDataList)
plotData(plotDataList,COLUMN_COLOUR,maxDate,minDate)
<强>输入:强>
Date,Start,Finish
17/03/17,03:15:00,03:55:00
17/03/17,06:20:00,06:35:00
17/03/17,09:00:00,09:40:00
17/03/17,13:10:00,13:35:00
17/03/17,15:45:00,16:30:00
17/03/17,18:45:00,19:25:00
17/03/17,21:15:00,21:35:00
18/03/17,00:30:00,02:00:00
18/03/17,04:50:00,05:05:00
18/03/17,08:20:00,08:40:00
18/03/17,12:30:00,13:10:00
18/03/17,16:30:00,17:00:00
18/03/17,18:45:00,19:00:00
18/03/17,20:30:00,21:00:00
19/03/17,00:00:00,12:00:00
19/03/17,18:00:00,23:59:00
19/03/17,13:00:00,14:00:00
20/03/17,12:00:00,11:00:00
<强>输出:强>
当我有时间时,可能会有点喜欢它:https://github.com/ambidextrous/timeLogGrapher