我有以下数据集
Data = data.frame(id =c(1,1),
dob=c("06/06/2018", "06/06/2018"),
reason=c("age", "disability"))
我需要删除所有重复的ID,其原因为年龄
尝试以下但确实有效
Final_data = data[which(! duplicate (id)) & which( reason=="age"),]
有人可以帮助我,我是R的新手
答案 0 :(得分:0)
which
返回整数位置。相反,&
可以直接应用于逻辑条件
Data[!duplicated(Data$id) & Data$reason=="age",]
# id dob reason
#1 1 06/06/2018 age
基于新数据
library(dplyr)
Data %>%
group_by(id) %>% f
ilter(reason != "age"| n()==1)
# A tibble: 3 x 3
# Groups: id [3]
# id dob reason
# <dbl> <chr> <chr>
#1 1.00 06/08/2018 disability
#2 2.00 08/08/1992 disability
#3 3.00 09/01/1995 age
或data.table
library(data.table)
setDT(Data)[, .SD[reason != 'age'| .N == 1], id]
Data <- data.frame(id = c(1,1,2, 2,3),
dob=c("06/08/2018","06/08/2018", "08/08/1992", "08/08/1992", "09/01/1995"),
reason= c("age","disability","age","disability","age"), stringsAsFactors = FALSE)
答案 1 :(得分:0)
在base-R中:
Data[!(Data$dob %in% Data$dob[duplicated(Data$dob)] & Data$reason == "age"),]
# id dob reason
# 2 1 06/08/2018 disability
# 4 2 08/08/1992 disability
# 5 3 09/01/1995 age
数据强>
Data = data.frame( id = c(1,1,2,2,3),
dob=c("06/08/2018","06/08/2018", "08/08/1992", "08/08/1992", "09/01/1995"),
reason= c("age","disability","age","disability","age"))