我正在尝试获取列desired_output
,其中包含基于value
列组的值grp_1
& grp_2
即如果value
列中的值具有唯一值,则值应为NA
如果值重复超过任何值,那么整个组将是重复值
如果值重复相等的次数,那么整个组将是MAX数值
grp_1 = c("A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A","A")
grp_2 = c("a","a","a","a","a","b","b","b","b","c","c","c","c","d","d","d","d","e","e","e","e")
value =c(1,2,3,3,4,1,2,3,4,1,1,2,2,1,2,4,4,1,3,3,3)
desired_output =c(3,3,3,3,3,NA,NA,NA,NA,2,2,2,2,4,4,4,4,3,3,3,3)
df = data.frame(grp_1,grp_2,value,desired_output)
我在获得重复值计数
后被击中func <- function(x) {
unlist(lapply(rle(x)$lengths, seq_len))
}
df <- group_by(df,grp_1,grp_2)
df_1 <- mutate(df, common=as.numeric(func(value)) )
提前致谢
答案 0 :(得分:0)
希望这有帮助!
library(dplyr)
library(modeest)
final_df <- df %>%
group_by(grp_1,grp_2) %>%
mutate(desired_output = ifelse(n()==length(unique(value)),
NA,
ifelse(length(unique(table(value)))==1,
max(value),
mlv(value, method='mfv')[['M']]))) %>%
data.frame()
final_df
输出是:
grp_1 grp_2 value desired_output
1 A a 1 3
2 A a 2 3
3 A a 3 3
4 A a 3 3
5 A a 4 3
6 A b 1 NA
7 A b 2 NA
8 A b 3 NA
9 A b 4 NA
10 A c 1 2
11 A c 1 2
12 A c 2 2
13 A c 2 2
14 A d 1 4
15 A d 2 4
16 A d 4 4
17 A d 4 4
18 A e 1 3
19 A e 3 3
20 A e 3 3
21 A e 3 3
#sample data
structure(list(grp_1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "A", class = "factor"),
grp_2 = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L), .Label = c("a",
"b", "c", "d", "e"), class = "factor"), value = c(1, 2, 3,
3, 4, 1, 2, 3, 4, 1, 1, 2, 2, 1, 2, 4, 4, 1, 3, 3, 3)), .Names = c("grp_1",
"grp_2", "value"), row.names = c(NA, -21L), class = "data.frame")
答案 1 :(得分:0)
如果有人喜欢data.table
data.table::setDT(df)
df[,desired_outcome:= max(value[duplicated(value)]), by=c("grp_1","grp_2")
][is.infinite(desired_outcome),desired_outcome:=NA]