我希望离开这里:
#Create data frame
id = c(1,1,1,8,2,2,2,3,3,9,4,5,6,6,7)
name = c("tim","tim","tim","bill","jack","jack","jack","matt","matt","jen","dave","chris","kate","kate","brad")
value_1 = c(50,50,50,55,50,0,50,88,88,90,75,90,110,110,65)
df =data.frame(id,name,value_1)
到这里:
#Create data frame
id = c(1,1,1,8,2,2,2,3,3,9,4,5,6,6,7)
name = c("tim","tim","tim","bill","jack","jack","jack","matt","matt","jen","dave","chris","kate","kate","brad")
value_1 = c(50,0,0,55,50,0,0,88,0,90,75,90,110,0,65)
final_df =data.frame(id,name,value_1)
我需要将value_1列中的dup替换为零,仅当满足以下条件时:如果唯一ID计数大于1,AND name相同,AND value_1相同,则替换所有值零除了一个值。我希望这是有道理的。我在解决这个问题时遇到了麻烦,并没有失去任何意见。我需要保留所有观察结果,因为此数据集中还有其他变量我无法删除。
感谢您的帮助R社区。 p>
答案 0 :(得分:2)
我们可以使用duplicated
将'value_1'中重复元素的值分配给0
df$value_1[duplicated(df)] <- 0
或使用data.table
library(data.table)
i1 <- setDT(df)[, .I[seq_len(.N) != 1], .(id, name)]$V1
df[i1, value_1 := 0]
df
# id name value_1
# 1: 1 tim 50
# 2: 1 tim 0
# 3: 1 tim 0
# 4: 8 bill 55
# 5: 2 jack 50
# 6: 2 jack 0
# 7: 2 jack 0
# 8: 3 matt 88
# 9: 3 matt 0
#10: 9 jen 90
#11: 4 dave 75
#12: 5 chris 90
#13: 6 kate 110
#14: 6 kate 0
#15: 7 brad 65
或dplyr
与case_when
library(dplyr)
df %>%
group_by(id, name) %>%
mutate(value_1 = case_when(row_number() > 1 ~ 0, TRUE ~ value_1))
# A tibble: 15 x 3
# Groups: id, name [9]
# id name value_1
# <dbl> <fctr> <dbl>
# 1 1.00 tim 50.0
# 2 1.00 tim 0
# 3 1.00 tim 0
# 4 8.00 bill 55.0
# 5 2.00 jack 50.0
# 6 2.00 jack 0
# 7 2.00 jack 0
# 8 3.00 matt 88.0
# 9 3.00 matt 0
#10 9.00 jen 90.0
#11 4.00 dave 75.0
#12 5.00 chris 90.0
#13 6.00 kate 110
#14 6.00 kate 0
#15 7.00 brad 65.0
答案 1 :(得分:1)
我们可以使用ave
和id
分组的name
替换除该组的第一个值之外的所有值。
df$value_1 <- with(df, ave(value_1, id, name, FUN = function(x)
replace(x, seq_along(x) != 1, 0)))
df
# id name value_1
#1 1 tim 50
#2 1 tim 0
#3 1 tim 0
#4 8 bill 55
#5 2 jack 50
#6 2 jack 0
#7 2 jack 0
#8 3 matt 88
#9 3 matt 0
#10 9 jen 90
#11 4 dave 75
#12 5 chris 90
#13 6 kate 110
#14 6 kate 0
#15 7 brad 65
我们也可以与dplyr
做同样的事情
library(dplyr)
df %>%
group_by(id, name) %>%
mutate(value_1 = replace(value_1, seq_along(value_1) != 1, 0))