为员工生成R中的直方图

时间:2016-11-17 16:59:56

标签: r

以下是我的数据集,其中包含员工出勤记录

         date   intime  outtime
2  02/11/2015 10:21:27 17:58:12
3  03/11/2015 10:13:09 18:52:44
4  04/11/2015 10:11:52 18:40:36
5  05/11/2015 10:31:42 18:16:57
6  06/11/2015 10:13:13 18:36:15
10 10/11/2015 10:03:20 18:07:52
11 11/11/2015 09:40:20 18:42:20
12 12/11/2015 10:38:56 18:37:20
13 13/11/2015 10:45:26 18:09:54
16 16/11/2015 10:13:13 18:36:15
17 17/11/2015 10:11:43 18:36:15
18 18/11/2015 10:13:13 18:36:15
19 19/11/2015 10:13:13 18:36:15
20 20/11/2015 12:14:25 20:25:08
23 23/11/2015 10:08:08 17:57:35
24 24/11/2015 14:30:32 18:36:15

员工以小时为单位服务的总时间为:

total_time <- with(newdata, sum(pmin(newdata$outtime, "18:00:00") - 
                                pmax(newdata$intime, "08:00:00")   ))
total_time <- 24*floor(as.numeric(total_time))
"Total time served by employee is : 96 hours"

我想为每位员工生成直方图,显示每月服务的小时数,共有5个分箱。

1 个答案:

答案 0 :(得分:0)

我更改了数据,以便我们有更多月份的信息:(更好的直方图)

library(data.table)
df = fread("    date   intime  outtime
           02/11/2015 10:21:27 17:58:12
           03/11/2015 10:13:09 18:52:44
           04/11/2015 10:11:52 18:40:36
           05/11/2015 10:31:42 18:16:57
           06/11/2015 10:13:13 18:36:15
           10/11/2015 10:03:20 18:07:52
           11/11/2015 09:40:20 18:42:20
           12/11/2015 10:38:56 18:37:20
           13/11/2015 10:45:26 18:09:54
           16/11/2015 10:13:13 18:36:15
           17/11/2015 10:11:43 18:36:15
           18/11/2015 10:13:13 18:36:15
           19/11/2015 10:13:13 18:36:15
           20/11/2015 12:14:25 20:25:08
           23/11/2015 10:08:08 17:57:35
           24/11/2015 14:30:32 18:36:15")

 df$intime <- as.POSIXct(df$intime, format = "%H:%M:%S")
 df$outtime <- as.POSIXct(df$outtime, format = "%H:%M:%S")

library(lubridate) #to extract the day
df$day <- dmy(df$date)
df$day <- day(df$day)

df$total_time <- difftime(pmin(df$outtime, as.POSIXct("18:00:00", format = "%H:%M:%S")), 
                       pmax(df$intime, as.POSIXct("08:00:00", format = "%H:%M:%S")), units = "hours")
df$total_time <- as.numeric(df$total_time)

library(ggplot2)
ggplot(df, aes(x = day, y= total_time))+geom_histogram(stat = "identity", bins = 5)

op

只有5个箱子(因为在24-30天的间隔内没有数据,所以图中只出现4个箱子):

df$breaks <- cut(df$day, breaks = c(0,5,10,15,30,max(df$day)))
df1=ddply(df, "breaks", summarise, "total_hr"=sum(total_time))
ggplot(df1, aes(x = breaks, y= total_hr))+
  geom_histogram(stat = "identity", bins = 5, binwidth = 0)

op