以下是我的数据集,其中包含员工出勤记录
date intime outtime
2 02/11/2015 10:21:27 17:58:12
3 03/11/2015 10:13:09 18:52:44
4 04/11/2015 10:11:52 18:40:36
5 05/11/2015 10:31:42 18:16:57
6 06/11/2015 10:13:13 18:36:15
10 10/11/2015 10:03:20 18:07:52
11 11/11/2015 09:40:20 18:42:20
12 12/11/2015 10:38:56 18:37:20
13 13/11/2015 10:45:26 18:09:54
16 16/11/2015 10:13:13 18:36:15
17 17/11/2015 10:11:43 18:36:15
18 18/11/2015 10:13:13 18:36:15
19 19/11/2015 10:13:13 18:36:15
20 20/11/2015 12:14:25 20:25:08
23 23/11/2015 10:08:08 17:57:35
24 24/11/2015 14:30:32 18:36:15
员工以小时为单位服务的总时间为:
total_time <- with(newdata, sum(pmin(newdata$outtime, "18:00:00") -
pmax(newdata$intime, "08:00:00") ))
total_time <- 24*floor(as.numeric(total_time))
"Total time served by employee is : 96 hours"
我想为每位员工生成直方图,显示每月服务的小时数,共有5个分箱。
答案 0 :(得分:0)
我更改了数据,以便我们有更多月份的信息:(更好的直方图)
library(data.table)
df = fread(" date intime outtime
02/11/2015 10:21:27 17:58:12
03/11/2015 10:13:09 18:52:44
04/11/2015 10:11:52 18:40:36
05/11/2015 10:31:42 18:16:57
06/11/2015 10:13:13 18:36:15
10/11/2015 10:03:20 18:07:52
11/11/2015 09:40:20 18:42:20
12/11/2015 10:38:56 18:37:20
13/11/2015 10:45:26 18:09:54
16/11/2015 10:13:13 18:36:15
17/11/2015 10:11:43 18:36:15
18/11/2015 10:13:13 18:36:15
19/11/2015 10:13:13 18:36:15
20/11/2015 12:14:25 20:25:08
23/11/2015 10:08:08 17:57:35
24/11/2015 14:30:32 18:36:15")
df$intime <- as.POSIXct(df$intime, format = "%H:%M:%S")
df$outtime <- as.POSIXct(df$outtime, format = "%H:%M:%S")
library(lubridate) #to extract the day
df$day <- dmy(df$date)
df$day <- day(df$day)
df$total_time <- difftime(pmin(df$outtime, as.POSIXct("18:00:00", format = "%H:%M:%S")),
pmax(df$intime, as.POSIXct("08:00:00", format = "%H:%M:%S")), units = "hours")
df$total_time <- as.numeric(df$total_time)
library(ggplot2)
ggplot(df, aes(x = day, y= total_time))+geom_histogram(stat = "identity", bins = 5)
只有5个箱子(因为在24-30天的间隔内没有数据,所以图中只出现4个箱子):
df$breaks <- cut(df$day, breaks = c(0,5,10,15,30,max(df$day)))
df1=ddply(df, "breaks", summarise, "total_hr"=sum(total_time))
ggplot(df1, aes(x = breaks, y= total_hr))+
geom_histogram(stat = "identity", bins = 5, binwidth = 0)