我有一个数据帧(df),其中df $ time具有以下时间值:
df$id df$time
1 12:20
2 12:40
3 1:00
4 1:20
5 2:00
6 3:00
7 3:15
8 4:00
9 7:00
10 11:00
11 12:00
12 12:20
13 12:40
14 1:00
15 1:30
16 3:00
17 4:00
18 4:30
19 5:00
20 5:15
21 8:00
22 10:00
我要指出的是,从12:00到11:59的第一个时间值范围(id 1:10)是AM,第二个范围是pm。
拥有某事:
df$id df$time
1 12:20am
2 12:40am
....
.....
11 12:00pm
12 12:20pm
我有成千上万张表,我在考虑一个循环,它将以某种方式放置第一组,即df $ time [i] <12:00或df $ time [i] <1:00和i <而不是最小数量,但不确定是否有更有效的解决方案,这将定义值的第一范围为id 1:10,第二范围为11:22
答案 0 :(得分:0)
这应该相当快。
df <- read.table(text="
id time
1 12:20
2 12:40
3 1:00
4 1:20
5 2:00
6 3:00
7 3:15
8 4:00
9 7:00
10 11:00
11 12:00
12 12:20
13 12:40
14 1:00
15 1:30
16 3:00
17 4:00
18 4:30
19 5:00
20 5:15
21 8:00
22 10:00", header=TRUE, stringsAsFactors=FALSE)
hm2dh <- function(x) {
hm <- do.call(rbind, strsplit(x, ":"))
as.numeric(hm[,1]) + as.numeric(hm[,2])/60
}
ampm <- c("pm", "am")[(cumprod(sign(diff(c(0, hm2dh(df$time) %% 12))))+3)/2]
df$timep <- paste0(df$time, ampm)
df
# id time timep
# 1 1 12:20 12:20am
# 2 2 12:40 12:40am
# 3 3 1:00 1:00am
# 4 4 1:20 1:20am
# 5 5 2:00 2:00am
# 6 6 3:00 3:00am
# 7 7 3:15 3:15am
# 8 8 4:00 4:00am
# 9 9 7:00 7:00am
# 10 10 11:00 11:00am
# 11 11 12:00 12:00pm
# 12 12 12:20 12:20pm
# 13 13 12:40 12:40pm
# 14 14 1:00 1:00pm
# 15 15 1:30 1:30pm
# 16 16 3:00 3:00pm
# 17 17 4:00 4:00pm
# 18 18 4:30 4:30pm
# 19 19 5:00 5:00pm
# 20 20 5:15 5:15pm
# 21 21 8:00 8:00pm
# 22 22 10:00 10:00pm
答案 1 :(得分:0)
以下是使用您发布的数据的dplyr
方法:
# example data
df <- read.table(text="
id time
1 12:20
2 12:40
3 1:00
4 1:20
5 2:00
6 3:00
7 3:15
8 4:00
9 7:00
10 11:00
11 12:00
12 12:20
13 12:40
14 1:00
15 1:30
16 3:00
17 4:00
18 4:30
19 5:00
20 5:15
21 8:00
22 10:00",
header=TRUE, stringsAsFactors=FALSE)
# create vectorised function to extract the hours
GetHrs = function(x) as.numeric(unlist(strsplit(x, ":"))[1])
GetHrs = Vectorize(GetHrs)
df %>%
mutate(hr = GetHrs(time), # get the hrs
group = cumsum(hr == 12 & lag(hr, default = 0) != 12), # create 2 groups based on where 12 appears after a value from 1 to 11
time_upd = ifelse(group == 1, paste0(time,"AM"), paste0(time,"PM"))) %>% # update values based on the grouping
select(id, time_upd) # keep only columns of interest
# id time_upd
# 1 1 12:20AM
# 2 2 12:40AM
# 3 3 1:00AM
# 4 4 1:20AM
# 5 5 2:00AM
# 6 6 3:00AM
# 7 7 3:15AM
# 8 8 4:00AM
# 9 9 7:00AM
# 10 10 11:00AM
# 11 11 12:00PM
# 12 12 12:20PM
# 13 13 12:40PM
# 14 14 1:00PM
# 15 15 1:30PM
# 16 16 3:00PM
# 17 17 4:00PM
# 18 18 4:30PM
# 19 19 5:00PM
# 20 20 5:15PM
# 21 21 8:00PM
# 22 22 10:00PM
答案 2 :(得分:0)
我们可以将时间值转换为模1200,以找到diff
小于零的切点。其余的操作可以在Map
中完成。
cp <- which(c(0, diff(as.numeric(gsub("\\D", "", df$time)) %% 1200)) < 0)
df$time <- unlist(Map(paste0, list(df$time[1:(cp-1)], df$time[cp:nrow(df)]), c("am", "pm")))
df
# id time
# 1 1 12:20am
# 2 2 12:40am
# 3 3 1:00am
# 4 4 1:20am
# 5 5 2:00am
# 6 6 3:00am
# 7 7 3:15am
# 8 8 4:00am
# 9 9 7:00am
# 10 10 11:00am
# 11 11 12:00pm
# 12 12 12:20pm
# 13 13 12:40pm
# 14 14 1:00pm
# 15 15 1:30pm
# 16 16 3:00pm
# 17 17 4:00pm
# 18 18 4:30pm
# 19 19 5:00pm
# 20 20 5:15pm
# 21 21 8:00pm
# 22 22 10:00pm
数据
df <- structure(list(id = 1:22, time = c("12:20", "12:40", "1:00",
"1:20", "2:00", "3:00", "3:15", "4:00", "7:00", "11:00", "12:00",
"12:20", "12:40", "1:00", "1:30", "3:00", "4:00", "4:30", "5:00",
"5:15", "8:00", "10:00")), row.names = c(NA, -22L), class = "data.frame")