在R

时间:2017-03-16 02:51:38

标签: r

我有一个ID和时间戳的数据集。

每个ID通常每分钟有12个时间戳记。 我很难为每分钟为每个时间戳添加5秒的间隔。

Actual Data                     Desired Format

    ID       Time           ID        Time     
    4466    12/1/14 19:56   4466    12/1/14 19:56:00
    4466    12/1/14 19:56   4466    12/1/14 19:56:05
    4466    12/1/14 19:56   4466    12/1/14 19:56:10
    4466    12/1/14 19:56   4466    12/1/14 19:56:15
    4466    12/1/14 19:56   4466    12/1/14 19:56:20
    4466    12/1/14 19:56   4466    12/1/14 19:56:25
    4466    12/1/14 19:56   4466    12/1/14 19:56:30
    4466    12/1/14 19:56   4466    12/1/14 19:56:35
    4466    12/1/14 19:56   4466    12/1/14 19:56:40
    4466    12/1/14 19:56   4466    12/1/14 19:56:45
    4466    12/1/14 19:56   4466    12/1/14 19:56:50
    4466    12/1/14 19:56   4466    12/1/14 19:56:55
    1136    3/23/15 23:00   1136    3/23/15 23:00:00
    1136    3/23/15 23:00   1136    3/23/15 23:00:05
    1136    3/23/15 23:00   1136    3/23/15 23:00:10
    1136    3/23/15 23:01   1136    3/23/15 23:01:00
    1136    3/23/15 23:01   1136    3/23/15 23:01:05
    1136    3/23/15 23:01   1136    3/23/15 23:01:10
    1136    3/23/15 23:01   1136    3/23/15 23:01:15
    1136    3/23/15 23:01   1136    3/23/15 23:01:20
    1136    3/23/15 23:01   1136    3/23/15 23:01:25
    1136    3/23/15 23:01   1136    3/23/15 23:01:30
    1136    3/23/15 23:01   1136    3/23/15 23:01:35
    1136    3/23/15 23:01   1136    3/23/15 23:01:40
    1136    3/23/15 23:01   1136    3/23/15 23:01:45
    1136    3/23/15 23:01   1136    3/23/15 23:01:50
    1136    3/23/15 23:01   1136    3/23/15 23:01:55

3 个答案:

答案 0 :(得分:1)

我们可以使用setDT(df1)执行此操作。将'data.frame'转换为'data.table'(sprintf),按'ID'和'时间'分组,使用library(data.table) setDT(df1)[, NewTime := sprintf("%s:%02d", Time, seq(0, 55, by = 5)[seq_len(.N)]) , .(ID, Time)] df1 # ID Time NewTime # 1: 4466 12/1/14 19:56 12/1/14 19:56:00 # 2: 4466 12/1/14 19:56 12/1/14 19:56:05 # 3: 4466 12/1/14 19:56 12/1/14 19:56:10 # 4: 4466 12/1/14 19:56 12/1/14 19:56:15 # 5: 4466 12/1/14 19:56 12/1/14 19:56:20 # 6: 4466 12/1/14 19:56 12/1/14 19:56:25 # 7: 4466 12/1/14 19:56 12/1/14 19:56:30 # 8: 4466 12/1/14 19:56 12/1/14 19:56:35 # 9: 4466 12/1/14 19:56 12/1/14 19:56:40 #10: 4466 12/1/14 19:56 12/1/14 19:56:45 #11: 4466 12/1/14 19:56 12/1/14 19:56:50 #12: 4466 12/1/14 19:56 12/1/14 19:56:55 #13: 1136 3/23/15 23:00 3/23/15 23:00:00 #14: 1136 3/23/15 23:00 3/23/15 23:00:05 #15: 1136 3/23/15 23:00 3/23/15 23:00:10 #16: 1136 3/23/15 23:01 3/23/15 23:01:00 #17: 1136 3/23/15 23:01 3/23/15 23:01:05 #18: 1136 3/23/15 23:01 3/23/15 23:01:10 #19: 1136 3/23/15 23:01 3/23/15 23:01:15 #20: 1136 3/23/15 23:01 3/23/15 23:01:20 #21: 1136 3/23/15 23:01 3/23/15 23:01:25 #22: 1136 3/23/15 23:01 3/23/15 23:01:30 #23: 1136 3/23/15 23:01 3/23/15 23:01:35 #24: 1136 3/23/15 23:01 3/23/15 23:01:40 #25: 1136 3/23/15 23:01 3/23/15 23:01:45 #26: 1136 3/23/15 23:01 3/23/15 23:01:50 #27: 1136 3/23/15 23:01 3/23/15 23:01:55 将每次5秒创建的序列粘贴'时间'到创建'NewTime'列

df1 <- structure(list(ID = c(4466L, 4466L, 4466L, 4466L, 4466L, 4466L, 
4466L, 4466L, 4466L, 4466L, 4466L, 4466L, 1136L, 1136L, 1136L, 
1136L, 1136L, 1136L, 1136L, 1136L, 1136L, 1136L, 1136L, 1136L, 
1136L, 1136L, 1136L), Time = c("12/1/14 19:56", "12/1/14 19:56", 
"12/1/14 19:56", "12/1/14 19:56", "12/1/14 19:56", "12/1/14 19:56", 
"12/1/14 19:56", "12/1/14 19:56", "12/1/14 19:56", "12/1/14 19:56", 
"12/1/14 19:56", "12/1/14 19:56", "3/23/15 23:00", "3/23/15 23:00", 
"3/23/15 23:00", "3/23/15 23:01", "3/23/15 23:01", "3/23/15 23:01", 
"3/23/15 23:01", "3/23/15 23:01", "3/23/15 23:01", "3/23/15 23:01", 
"3/23/15 23:01", "3/23/15 23:01", "3/23/15 23:01", "3/23/15 23:01", 
"3/23/15 23:01")), .Names = c("ID", "Time"), row.names = c(NA, 
-27L), class = "data.frame")

数据

price < 1 && price >-1 && price != 0

答案 1 :(得分:1)

您可以使用row_number()包中的dplyrlibrary(dplyr) df %>% group_by(ID,Time ) %>% mutate(NTime=sprintf("%s.%02d",Time, row_number(Time)*5))

npycurses.ActionFormV2

答案 2 :(得分:0)

或者只是:

df <- data.frame(ID = 4466, Time = "12/1/14 19:56") 
df2 <- cbind(df[rep(1 : nrow(df),each=12),], ntime = strptime (paste(df$Time, ":", seq(0, 55, 5),sep=""), "%m/%d/%y %H:%M:%S", tz= "UTC"))