我有员工编号,他们的上班时间以及每天的上班时间。我想按小时计算员工人数。 Excel公式也可以。
我的样本数据如下:
Employee ID Day Clockin Clockout
1 Mon 7:00 15:00
1 Fri 7:00 15:00
2 Wed 8:00 22:00
2 Mon 10:00 18:00
2 Fri 9:00 20:00
3 Mon 7:00 8:00
我的输出应如下所示:
Timestamp Mon Tue Wed Thu Fri
7:00 2 0 0 0 1
8:00 1 0 1 0 1
9:00 1 0 1 0 2
10:00 2 0 1 0 2
11:00 2 0 1 0 2
我试图对clock in
和clock out
列进行突变,但是没有用。我希望使用R中的解决方案。我尝试了此处Calculate the days per month between checkin and checkout dates in R中提到的解决方案,但似乎不适用于我的情况
输入示例
ID Day Clockin Clockout
Employee 1 Mon 7:00 15:00
Employee 2 Mon 8:00 15:00
Employee 3 Mon 12:00 14:00
Employee 4 Mon 13:00 20:00
Employee 5 Mon 15:00 22:00
Employee 6 Mon 23:00 23:30
输出示例
Mon Count
7:00 1
8:00 2
9:00 2
10:00 2
11:00 2
12:00 3
13:00 4
14:00 3
15:00 2
16:00 2
17:00 2
18:00 2
19:00 2
20:00 1
21:00 1
22:00 0
23:00 1
如果您关注计数,那么当有人每天进出时,它都会改变。
答案 0 :(得分:2)
这比看起来要复杂一些,因为如果员工夜班工作到第二天,我们需要更改午夜后计算他们在场的日期。
为此,我编写了自己的函数,该函数将日期取入,取入和取出,并返回时间序列(1小时间隔)以及与每个间隔关联的日期。见下文;
time.seq <- function(day.i, start.i, end.i, step.i = '1 hour'){
require(lubridate)
require(DescTools) ## to get the abbreviated weekdays
start.i <- as.POSIXct(start.i, format="%H:%M")
end.i <- as.POSIXct(end.i, format="%H:%M")
if (start.i > end.i) { ## accounting for working on the next day (after midnight)
end.i <- end.i + days(1)
}
out.h <- seq(start.i, end.i , step.i)
## Going to the next day based on difference in time (unit = days)
day.abb.ex <- c(day.abb, "Mon")
out.d <- day.abb.ex[which(toupper(day.abb) == day.i) +
c(0, cumsum(as.numeric(diff(floor_date(out.h, unit = "day"))/86400)))]
out <- list(DAY = out.d, HOUR = out.h)
return(out)
}
然后使用该函数,并且与其他答案中使用的逻辑类似,我们可以获得每小时的计数。
library(dplyr)
library(tidyr)
df %>%
rowwise() %>%
mutate(HOUR = list(time.seq(Day, Clockin, Clockout)[["HOUR"]]),
DAY = list(time.seq(Day, Clockin, Clockout)[["DAY"]])) %>%
unnest(c(HOUR, DAY)) %>%
count(Day=DAY, Hour = format(HOUR, '%H:%M'), name = "Count") %>%
pivot_wider(names_from = Day, values_from = Count)
#> # A tibble: 22 x 8
#> Hour Fri Mon Sat Sun Thu Tue Wed
#> <chr> <int> <int> <int> <int> <int> <int> <int>
#> 1 07:00 2 NA 1 NA 1 1 1
#> 2 08:00 2 NA 1 NA 1 1 1
#> 3 09:00 2 NA 1 NA 1 1 1
#> 4 10:00 4 NA 1 NA 3 3 3
#> 5 11:00 4 NA 1 NA 3 3 3
#> 6 12:00 5 1 1 NA 4 4 4
#> 7 13:00 5 1 1 NA 4 4 4
#> 8 14:00 5 1 1 NA 4 4 4
#> 9 15:00 5 1 1 NA 4 4 4
#> 10 16:00 3 1 NA NA 3 3 3
#> # ... with 12 more rows
df <- structure(list(Employee.ID = c(462L, 462L, 559L, 559L, 559L,
559L, 560L, 560L, 560L, 560L, 560L, 715L, 715L, 715L, 715L, 715L,
791L, 791L, 791L, 791L, 802L, 802L, 802L, 802L), Day = structure(c(2L,
4L, 7L, 8L, 6L, 2L, 3L, 7L, 8L, 6L, 2L, 3L, 7L, 2L, 4L, 5L, 7L,
8L, 6L, 2L, 7L, 8L, 6L, 2L), .Label = c("", "FRI", "MON", "SAT",
"SUN", "THU", "TUE", "WED"), class = "factor"), Clockin = structure(c(5L,
5L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 2L, 2L, 2L, 2L), .Label = c("", "10:00", "12:00",
"20:00", "7:00"), class = "factor"), Clockout = structure(c(2L,
2L, 4L, 4L, 5L, 4L, 7L, 8L, 7L, 7L, 6L, 10L, 9L, 11L, 9L, 9L,
2L, 2L, 2L, 2L, 4L, 7L, 3L, 4L), .Label = c("", "15:00", "17:30",
"18:00", "18:15", "19:45", "20:00", "22:00", "4:00", "4:15",
"4:45"), class = "factor")), row.names = c(NA, 24L), class = "data.frame")
答案 1 :(得分:1)
如果我们将小时与“ Clockin”一起使用,请在R
中获得{Day},“ Clockin”的count
并使用{{1} }
pivot_wider
使用新数据
library(dplyr)
library(tidyr)
library(lubridate)
df1 %>%
transmute(Day = factor(Day, levels = c("MON", "TUE", "WED", "THU", "FRI", "SAT")),
Clockin = ymd_hms(Clockin)) %>%
count(Day, Clockin) %>%
complete(Day, Clockin = seq(min(Clockin), max(Clockin), by = "1 hour"),
fill = list(n = 0)) %>%
mutate(Clockin = format(Clockin, "%H:%M")) %>%
pivot_wider(names_from = Day, values_from = n)
# A tibble: 4 x 7
# Clockin MON TUE WED THU FRI SAT
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 07:00 0 0 0 0 1 1
#2 08:00 0 0 0 0 0 0
#3 09:00 0 0 0 0 0 0
#4 10:00 0 1 1 1 1 0
df2 %>%
transmute(Day, Hour = map2(as.POSIXct(Clockin, format = '%H:%M'),
as.POSIXct(Clockout, format = '%H:%M'), ~ seq(.x, .y, by = '1 hour'))) %>%
unnest(c(Hour)) %>%
count(Day, Hour = format(Hour, '%H:%M'))
# A tibble: 17 x 3
# Day Hour n
# <chr> <chr> <int>
# 1 Mon 07:00 1
# 2 Mon 08:00 2
# 3 Mon 09:00 2
# 4 Mon 10:00 2
# 5 Mon 11:00 2
# 6 Mon 12:00 3
# 7 Mon 13:00 4
# 8 Mon 14:00 4
# 9 Mon 15:00 4
#10 Mon 16:00 2
#11 Mon 17:00 2
#12 Mon 18:00 2
#13 Mon 19:00 2
#14 Mon 20:00 2
#15 Mon 21:00 1
#16 Mon 22:00 1
#17 Mon 23:00 1
答案 2 :(得分:1)
我想在这里分享我的解决方案,以防它对某人有所帮助。该解决方案唯一的区别是,我添加了另外一列positions
作为过滤器。如果您的问题与上述问题类似,则可以删除该过滤器。否则,此过滤器有助于按hours
对position id
进行排序。
#loading libraries
library(lubridate)
library(readxl)
library(stringr)
library(tidyr)
#read data
df <- read_excel('data_sample.xlsx',
col_types = c('numeric', 'text', 'date', 'guess', 'guess','skip', 'numeric'))
#convert clocks to float for faster comparisons
df$`Clock In` = hour(df$`Clock In`) + minute(df$`Clock In`) / 60
df$`Clock Out` = hour(df$`Clock Out`) + minute(df$`Clock Out`) / 60
#remove shallow records
df <- df[!(df$`Clock In` == df$`Clock Out`),]
#24 hours strings
hours = lapply(0 : 23, function(i) str_pad(paste(toString(i), ':00', sep=""), 5, pad = '0'))
#empty presence matrix
hours_mat = c()
#fill the presence matrix with ines when exists in work place
for (r in 1:nrow(df)) {
lis = c()
for (i in 0:23) {
if (i < df[r, 'Clock In'] | i >= df[r, 'Clock Out']){
lis[i + 1] <- 0
}
else {
lis[i + 1] <- 1
}
}
hours_mat <- rbind(hours_mat, lis)
}
#convert matrix to dataframe
hours_df = as.data.frame(hours_mat)
colnames(hours_df) <- hours
#bind the matrix to the original dataframe
final_df <- cbind(df, hours_df)
#aggregate presence count over date in every position
result <- aggregate(final_df[1:nrow(final_df),7:ncol(final_df)],
by=list(Date = final_df$Date, Position = final_df$Position),
FUN=sum)
#factorize dates for converting from wide to long format
result$Date = factor(result$Date)
#wide to long format
long_result <- gather(result, Timestamp, Count, '00:00':'23:00',
factor_key = TRUE)
#long to wide format using dates
result_wide <- spread(long_result, Date, Count)
#to select a particular position, uncomment this line :
#result_wide <- result_wide[result_wide$Position == 'your required position as number']
#write the final output to "output.csv"
write.csv(result_wide, 'output.csv')
,示例数据如下-
structure(list(`Employee Number` = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Day = c("THU", "FRI", "SAT", "SUN",
"WED", "THU", "FRI", "SAT", "SUN", "WED", "THU", "THU", "FRI",
"SAT", "SUN", "WED", "THU", "THU"), Date = structure(c(1577923200,
1578009600, 1578096000, 1578182400, 1578441600, 1578528000, 1578614400,
1578700800, 1578787200, 1579046400, 1579132800, 1579132800, 1579219200,
1579305600, 1579392000, 1579651200, 1579737600, 1579737600), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), ClockIn = structure(c(-2209021200,
-2209021200, -2209021200, -2209021200, -2209021200, -2209023000,
-2209021200, -2209021200, -2209021200, -2209021200, -2209075200,
-2209021200, -2209021200, -2209021200, -2209021200, -2209021200,
-2209075200, -2209021200), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`Clock Out` = structure(c(-2208992400, -2208992400, -2208992400,
-2208992400, -2208992400, -2208994200, -2208992400, -2208992400,
-2208992400, -2208992400, -2209075200, -2208999600, -2208992400,
-2208992400, -2208992400, -2208992400, -2209075200, -2208999600
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Department = c(20,
20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
20, 20), Position = c(35, 35, 35, 35, 35, 35, 35, 35, 35,
35, 35, 35, 35, 35, 35, 35, 35, 35)), row.names = c(NA, -18L
), class = c("tbl_df", "tbl", "data.frame"))