我有一个包含3523个观测值和92个变量的数据框。
下面是一个包含10个观察值和04:00-05:00的数据框示例
04:00 04:15 04:30 05:00 05:15 05:30
1: - - - - - -
2: 2 2 2 - 2 2
3: 2 - - 2 2 2
4: - - 2 - 2 2
5: - - - - 2 2
6: 2 - 2 2 - 2
7: - - - - 2 2
8: 2 2 - 2 2 2
9: - - - - 2 2
10: 2 2 - 2 2 2
.
这些列定义了从凌晨4:00到凌晨4:00(间隔15分钟)的24小时时间。这些行定义了观察数。
每行包含值“-”和“ 2”。
我想提取以'2'开头的间隔的开始和结束
例如2:04:00-04:30; 3:04:00; 05:00 4:04:30
我也想将输出保存在excel或txt文件中。你能帮我吗
答案 0 :(得分:1)
您可以像这样使用range()
:
fun <- function(dat) {
L <- lapply(seq_along(dat), function(x, ...) {
if (length(dat[x, dat[x, ] == 2, ]) >= 2) {
range(names(dat[x, dat[x, ] == 2]))
} else if (length(dat[x, dat[x, ] == 2, ]) == 1) {
c(names(dat)[which(dat[x, ] == 2)], NA)
}})
setNames(data.frame(do.call(rbind, L)), c("t0", "t1"))
}
产量
> fun(df1)
t0 t1
1 04:00 04:30
2 04:00 04:45
3 04:30 <NA>
数据
df1 <- setNames(data.frame(matrix(c(rep("-", 4), rep(2, 3), "-",
rep(c(2, rep("-", 2)), 2),2, rep("-", 25)),
ncol=4, byrow=TRUE)),
strftime(as.POSIXct((0:3)*15*60 + 3*60*60, origin=Sys.Date() ),
format="%H:%M"))
> df1
04:00 04:15 04:30 04:45
1 - - - -
2 2 2 2 -
3 2 - - 2
4 - - 2 -
5 - - - -
6 - - - -
7 - - - -
8 - - - -
9 - - - -
10 - - - -
答案 1 :(得分:0)
与@Mate讨论后,我已经对其进行了编辑(请参见此答案的注释):
library(tidyverse)
dat %>%
rownames_to_column("n") %>%
mutate(n = as.integer(n)) %>%
gather(key = "time", value = "observation", -n) %>%
group_by(n) %>%
filter(observation == "2") %>%
summarize(
interval = paste(time[seq(1, n(), 2)],
c(time, "...")[seq(2, n() + n() %% 2, 2)],
sep = "-",
collapse = ", ")
) %>%
ungroup() %>%
arrange(n) %>%
write_csv("my_results.csv")
# A tibble: 100 x 2
n interval
<int> <chr>
1 1 04:30-14:00, 19:30-20:15, 22:30-01:15, 03:45-...
2 2 06:15-08:00, 09:00-12:00, 13:45-16:30, 18:45-23:15, 00:30-02:15
3 3 06:00-06:30, 08:00-09:45, 11:15-13:30, 14:15-23:15, 01:00-01:30
4 4 20:00-21:15, 23:30-03:15
5 5 05:00-09:30, 10:00-10:30, 11:45-12:00, 13:15-13:30, 14:00-20:15, 20:30-21:3~
6 6 07:45-08:30, 09:15-13:15, 19:15-19:30, 20:30-20:45, 21:00-21:45, 01:45-...
7 7 09:30-17:45, 21:15-...
8 8 07:00-09:30, 12:45-18:00, 19:00-21:15, 00:15-02:00
9 9 05:45-06:15, 09:00-16:00, 17:15-19:45, 21:15-22:30, 23:00-...
10 10 10:00-10:15, 12:15-13:30, 16:15-16:45, 21:30-23:45, 00:45-01:30
colnms <- paste(
str_pad(rep(c(4:23, 0:3), each = 4), 2, "left", 0),
str_pad(rep(c(0, 15, 30, 45), times = 24), 2, "left", 0),
sep = ":"
)
set.seed(53248604)
dat <- matrix(sample(c("-", 2), 9600, prob = c(0.9, 0.1), replace = T), nrow = 100)
dimnames(dat) <- list(1:100, colnms)
dat <- as.data.frame(dat)