说我有一个df:
df <- data.frame(flag = c(rep(0, 20)),
include = c(rep(1, 20)))
df[c(4,8,16), ]$flag <- 1
df
flag include
1 0 1
2 0 1
3 0 1
4 1 1
5 0 1
6 0 1
7 0 1
8 1 1
9 0 1
10 0 1
11 0 1
12 0 1
13 0 1
14 0 1
15 0 1
16 1 1
17 0 1
18 0 1
19 0 1
20 0 1
如果行位于include
行的+/-两行内,我希望将flag == 1
标志更改为0。结果如下:
flag include
1 0 1
2 0 0
3 0 0
4 1 1
5 0 0
6 0 0
7 0 0
8 1 1
9 0 0
10 0 0
11 0 1
12 0 1
13 0 1
14 0 0
15 0 0
16 1 1
17 0 0
18 0 0
19 0 1
20 0 1
我想到了一些“创新”(阅读:效率低下且过于复杂)的方法,但我认为必须有一种简单的方法让我忽视。
如果答案是这样我可以将其推广到+/- n
行,那会很好,因为我有更多的数据,并且可能会在+/- 10行内进行搜索......
答案 0 :(得分:3)
data.table
的另一个选项:
library(data.table)
n = 2
# find the row number where flag is one
flag_one = which(df$flag == 1)
# find the index where include needs to be updated
idx = setdiff(outer(flag_one, -n:n, "+"), flag_one)
# update include in place
setDT(df)[idx[idx >= 1 & idx <= nrow(df)], include := 0][]
# or as @Frank commented the last step with base R would be
# df$include[idx[idx >= 1 & idx <= nrow(df)]] = 0
# flag include
# 1: 0 1
# 2: 0 0
# 3: 0 0
# 4: 1 1
# 5: 0 0
# 6: 0 0
# 7: 0 0
# 8: 1 1
# 9: 0 0
#10: 0 0
#11: 0 1
#12: 0 1
#13: 0 1
#14: 0 0
#15: 0 0
#16: 1 1
#17: 0 0
#18: 0 0
#19: 0 1
#20: 0 1
输入功能:
update_n <- function(df, n) {
flag_one = which(df$flag == 1)
idx = setdiff(outer(flag_one, -n:n, "+"), flag_one)
df$include[idx[idx >= 1 & idx <= nrow(df)]] = 0
df
}
答案 1 :(得分:2)
必须有另一种更简单的方法,但我能想到的第一种方法是使用sapply
和which
df$include[sapply(which(df$flag == 1) , function(x) c(x-2, x-1, x+1, x+2))] <- 0
df
# flag include
#1 0 1
#2 0 0
#3 0 0
#4 1 1
#5 0 0
#6 0 0
#7 0 0
#8 1 1
#9 0 0
#10 0 0
#11 0 1
#12 0 1
#13 0 1
#14 0 0
#15 0 0
#16 1 1
#17 0 0
#18 0 0
#19 0 1
#20 0 1
我们首先找出flag
为1的所有索引,然后围绕每个索引创建所需的数字序列,并将include
的索引转换为0.
对于变量n
,我们可以
n = 2
df$include[sapply(which(df$flag == 1),function(x) setdiff(seq(x-n, x+n),x))] <- 0
答案 2 :(得分:1)
n
对于replace(x = df$include,
list = sapply(1:NROW(df), function(i)
any(df$flag[c(max(1, i-n):max(1, i-1),
min(i+1, NROW(df)):min(i+n, NROW(df)))] == 1)), values = 0)
行,
{
"entry": [
{
"blockNumber": "1941794",
"blockHash": "0x41ee74e34cbf9ef4116febea958dbc260e2da3a6bf6f601bfaeb2cd9ab944a29",
"hash": "0xf2b5b8fb173e371cbb427625b0339f6023f8b4ec3701b7a5c691fa9cef9daf63",
"from": "0x3c0cbb196e3847d40cb4d77d7dd3b386222998d9",
"to": "0x2ba24c66cbff0bda0e3053ea07325479b3ed1393",
"gas": "121000",
"gasUsed": "21000",
"gasPrice": "20000000000",
"input": "",
"logs": [],
"nonce": "14",
"value": "0x24406420d09ce7440000",
"timestamp": "2016-07-24 20:28:11 UTC"
},
{
"blockNumber": "1941716",
"blockHash": "0x75e1602cad967a781f4a2ea9e19c97405fe1acaa8b9ad333fb7288d98f7b49e3",
"hash": "0xf8f2a397b0f7bb1ff212b6bcc57e4a56ce3e27eb9f5839fef3e193c0252fab26",
"from": "0xa0480c6f402b036e33e46f993d9c7b93913e7461",
"to": "0xb2ea1f1f997365d1036dd6f00c51b361e9a3f351",
"gas": "121000",
"gasUsed": "21000",
"gasPrice": "20000000000",
"input": "",
"logs": [],
"nonce": "1",
"value": "0xde0b6b3a7640000",
"timestamp": "2016-07-24 20:12:17 UTC"
}
]
}
答案 3 :(得分:1)
另一种方法是使用zoo::rollapply
。要确定行是否在flag == 1
行的+/-两行内,我们会检查窗口中的最大flag
是否为1。
我们需要rollapply
而不是rollmax
,因为我们需要指定partial = T
。
is_within_flag_window <- function(flag, n) {
zoo::rollapply(flag, width = (2 * n) + 1, partial = T, FUN = max) == 1
}
df %>%
mutate(include = ifelse(flag == 1, 1,
ifelse(is_within_flag_window(flag, 2), 0,
1)))