选择时间值的第一个范围以添加AM / PM

时间:2019-06-17 12:09:31

标签: r dataframe datetime

我有一个数据帧(df),其中df $ time具有以下时间值:

df$id   df$time
  1      12:20
  2      12:40
  3       1:00
  4       1:20
  5       2:00
  6       3:00
  7       3:15
  8       4:00
  9       7:00
 10      11:00
 11      12:00
 12      12:20
 13      12:40
 14       1:00
 15       1:30
 16       3:00
 17       4:00
 18       4:30
 19       5:00
 20       5:15
 21       8:00
 22      10:00

我要指出的是,从12:00到11:59的第一个时间值范围(id 1:10)是AM,第二个范围是pm。

拥有某事:

 df$id   df$time
      1      12:20am
      2      12:40am
....
.....
     11      12:00pm
     12      12:20pm

我有成千上万张表,我在考虑一个循环,它将以某种方式放置第一组,即df $ time [i] <12:00或df $ time [i] <1:00和i <而不是最小数量,但不确定是否有更有效的解决方案,这将定义值的第一范围为id 1:10,第二范围为11:22

3 个答案:

答案 0 :(得分:0)

这应该相当快。

df <- read.table(text="
id   time
  1      12:20
  2      12:40
  3       1:00
  4       1:20
  5       2:00
  6       3:00
  7       3:15
  8       4:00
  9       7:00
 10      11:00
 11      12:00
 12      12:20
 13      12:40
 14       1:00
 15       1:30
 16       3:00
 17       4:00
 18       4:30
 19       5:00
 20       5:15
 21       8:00
 22      10:00", header=TRUE, stringsAsFactors=FALSE)

hm2dh <- function(x) {
    hm <- do.call(rbind, strsplit(x, ":"))
    as.numeric(hm[,1]) + as.numeric(hm[,2])/60
}

ampm <- c("pm", "am")[(cumprod(sign(diff(c(0, hm2dh(df$time) %% 12))))+3)/2]

df$timep <- paste0(df$time, ampm)

df
#    id  time   timep
# 1   1 12:20 12:20am
# 2   2 12:40 12:40am
# 3   3  1:00  1:00am
# 4   4  1:20  1:20am
# 5   5  2:00  2:00am
# 6   6  3:00  3:00am
# 7   7  3:15  3:15am
# 8   8  4:00  4:00am
# 9   9  7:00  7:00am
# 10 10 11:00 11:00am
# 11 11 12:00 12:00pm
# 12 12 12:20 12:20pm
# 13 13 12:40 12:40pm
# 14 14  1:00  1:00pm
# 15 15  1:30  1:30pm
# 16 16  3:00  3:00pm
# 17 17  4:00  4:00pm
# 18 18  4:30  4:30pm
# 19 19  5:00  5:00pm
# 20 20  5:15  5:15pm
# 21 21  8:00  8:00pm
# 22 22 10:00 10:00pm

答案 1 :(得分:0)

以下是使用您发布的数据的dplyr方法:

# example data
df <- read.table(text="
id   time
  1      12:20
  2      12:40
  3       1:00
  4       1:20
  5       2:00
  6       3:00
  7       3:15
  8       4:00
  9       7:00
 10      11:00
 11      12:00
 12      12:20
 13      12:40
 14       1:00
 15       1:30
 16       3:00
 17       4:00
 18       4:30
 19       5:00
 20       5:15
 21       8:00
 22      10:00", 
header=TRUE, stringsAsFactors=FALSE)

# create vectorised function to extract the hours
GetHrs = function(x) as.numeric(unlist(strsplit(x, ":"))[1])
GetHrs = Vectorize(GetHrs)


df %>%
  mutate(hr = GetHrs(time),                                                        # get the hrs
         group = cumsum(hr == 12 & lag(hr, default = 0) != 12),                    # create 2 groups based on where 12 appears after a value from 1 to 11
         time_upd = ifelse(group == 1, paste0(time,"AM"), paste0(time,"PM"))) %>%  # update values based on the grouping
  select(id, time_upd)                                                             # keep only columns of interest

#    id time_upd
# 1   1  12:20AM
# 2   2  12:40AM
# 3   3   1:00AM
# 4   4   1:20AM
# 5   5   2:00AM
# 6   6   3:00AM
# 7   7   3:15AM
# 8   8   4:00AM
# 9   9   7:00AM
# 10 10  11:00AM
# 11 11  12:00PM
# 12 12  12:20PM
# 13 13  12:40PM
# 14 14   1:00PM
# 15 15   1:30PM
# 16 16   3:00PM
# 17 17   4:00PM
# 18 18   4:30PM
# 19 19   5:00PM
# 20 20   5:15PM
# 21 21   8:00PM
# 22 22  10:00PM

答案 2 :(得分:0)

我们可以将时间值转换为模1200,以找到diff小于零的切点。其余的操作可以在Map中完成。

cp <- which(c(0, diff(as.numeric(gsub("\\D", "", df$time)) %% 1200)) < 0)
df$time <- unlist(Map(paste0, list(df$time[1:(cp-1)], df$time[cp:nrow(df)]), c("am", "pm")))
df
#    id    time
# 1   1 12:20am
# 2   2 12:40am
# 3   3  1:00am
# 4   4  1:20am
# 5   5  2:00am
# 6   6  3:00am
# 7   7  3:15am
# 8   8  4:00am
# 9   9  7:00am
# 10 10 11:00am
# 11 11 12:00pm
# 12 12 12:20pm
# 13 13 12:40pm
# 14 14  1:00pm
# 15 15  1:30pm
# 16 16  3:00pm
# 17 17  4:00pm
# 18 18  4:30pm
# 19 19  5:00pm
# 20 20  5:15pm
# 21 21  8:00pm
# 22 22 10:00pm

数据

df <- structure(list(id = 1:22, time = c("12:20", "12:40", "1:00", 
"1:20", "2:00", "3:00", "3:15", "4:00", "7:00", "11:00", "12:00", 
"12:20", "12:40", "1:00", "1:30", "3:00", "4:00", "4:30", "5:00", 
"5:15", "8:00", "10:00")), row.names = c(NA, -22L), class = "data.frame")