我有一个由两列组成的数据框:ID和date_time。 ID指示正在记录的人,date_time指示何时。请参见下面的数据框。
从这个数据框中,我想计算一个新的数据框,其中包含三列:“测量”,“ ID”和“日期”。
每次唯一测量都应有一行
structure(list(date_time = c("2020.03.02 22:00:17", "2020.03.02 22:05:17",
"2020.03.02 22:10:17", "2020.03.02 22:35:17", "2020.03.02 22:40:17",
"2020.03.02 22:45:17", "2020.03.02 22:50:17", "2020.03.02 22:55:17",
"2020.03.02 23:00:17", "2020.03.02 23:05:17", "2020.03.02 23:10:17",
"2020.03.02 23:15:17", "2020.03.02 23:20:17", "2020.03.02 23:25:17",
"2020.03.02 23:30:17", "2020.03.02 23:35:17", "2020.03.02 23:40:17",
"2020.03.02 23:45:17", "2020.03.02 23:50:17", "2020.03.02 23:55:17",
"2020.03.03 00:00:17", "2020.03.03 00:55:17", "2020.03.03 01:00:17",
"2020.03.03 01:05:17", "2020.03.03 01:10:17", "2020.03.03 01:15:17",
"2020.03.03 01:20:17", "2020.03.03 01:25:17", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32"), id = c(12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L)), row.names = c(NA, 46L), class = "data.frame")
#Expected output:
output <- read.table(header=TRUE, text ="
ID Date Measurement
12 2020.03.02 1
12 2020.03.03 2
13 2020.05.09 1
")
我是R的新手,并尝试使用tidyverse。 非常感谢所有帮助,谢谢!
答案 0 :(得分:1)
我敢肯定有更好的方法,但是.....
library(tidyverse)
df <- data.frame(
structure(list(date_time = c("2020.03.02 22:00:17", "2020.03.02 22:05:17",
"2020.03.02 22:10:17", "2020.03.02 22:35:17", "2020.03.02 22:40:17",
"2020.03.02 22:45:17", "2020.03.02 22:50:17", "2020.03.02 22:55:17",
"2020.03.02 23:00:17", "2020.03.02 23:05:17", "2020.03.02 23:10:17",
"2020.03.02 23:15:17", "2020.03.02 23:20:17", "2020.03.02 23:25:17",
"2020.03.02 23:30:17", "2020.03.02 23:35:17", "2020.03.02 23:40:17",
"2020.03.02 23:45:17", "2020.03.02 23:50:17", "2020.03.02 23:55:17",
"2020.03.03 00:00:17", "2020.03.03 00:55:17", "2020.03.03 01:00:17",
"2020.03.03 01:05:17", "2020.03.03 01:10:17", "2020.03.03 01:15:17",
"2020.03.03 01:20:17", "2020.03.03 01:25:17", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32", "2020.05.09 08:39:32",
"2020.05.09 08:39:32", "2020.05.09 08:39:32"), id = c(12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L)), row.names = c(NA, 46L), class = "data.frame")
)
df %>%
mutate(
date_time = anytime::anydate(date_time)
) %>%
group_by(id) %>% summarise(date = unique(date_time)) %>%
mutate(
Measurement = 1,
Measurement = cumsum(Measurement)
)
# A tibble: 3 x 3
# Groups: id [2]
id date Measurement
<int> <date> <dbl>
1 12 2020-03-02 1
2 12 2020-03-03 2
3 13 2020-05-09 1
答案 1 :(得分:1)
假设df1是您的数据帧,另一种方法是...
df1$dateTime = as_datetime(df1$date_time, format = "%Y.%m.%d %H:%M:%S")
df1$mydate = as.Date(df1$date_time, format = "%Y.%m.%d %H:%M:%S")
df1$tm <- as.numeric(df1$dateTime)
df1$dts <- 86400*as.numeric(df1$mydate)
df2 <- df1 %>% group_by(id,mydate) %>%
transform(date = case_when(((dts-3600)<tm & tm<(dts+82800) )~paste0(mydate),((dts+82800)<=tm)~paste0(mydate+1) )) %>%
select(id,date) %>% unique() %>%
group_by(id) %>% mutate(measurement = row_number())
df2
>df2
# A tibble: 3 x 3
# Groups: id [2]
id date measurement
<int> <chr> <int>
1 12 2020-03-02 1
2 12 2020-03-03 2
3 13 2020-05-09 1