基于多个ifelse条件的新变量R.

时间:2018-01-24 16:22:57

标签: r if-statement dplyr

我有一个数据框users_full,我想在其中创建一个新变量party_followers,它包含以下类别:AFD,SPD,CDU,FDP,Linke,Multiple或其他

我想通过将users_full$user_id与关注者各自数据框的user_id变量相匹配来创建新变量的类别,例如afd_followersspd_followers,等。

问题是users_full$user_id可能是多方的追随者,我不知道如何使用ifelse语句解释这一点。

我尝试了以下内容,但它无法正常工作。

mutate(users_full, party_followers = ifelse(user_id == afd_followers$user_id & user_id != cdu_followers$user_id & user_id != spd_followers$user_id & user_id != linke_followers$user_id & user_id != fdp_followers$user_id, "AfD",
                                user_id == cdu_followers$user_id & user_id != afd_followers$user_id & user_id != spd_followers$user_id & user_id != linke_followers$user_id & user_id != fdp_followers$user_id, "CDU",
                                user_id == spd_followers$user_id & user_id != cdu_followers$user_id & user_id != afd_followers$user_id & user_id != linke_followers$user_id & user_id != fdp_followers$user_id, "SPD",
                                user_id == linke_followers$user_id & user_id != cdu_followers$user_id & user_id != afd_followers$user_id & user_id != spd_followers$user_id & user_id != fdp_followers$user_id, "Linke",
                                user_id == fdp_followers$user_id & user_id != cdu_followers$user_id & user_id != afd_followers$user_id & user_id != spd_followers$user_id & user_id != linke_followers$user_id, "FDP",
                                user_id == afd_followers$user_id & cdu_followers$user_id & spd_followers$user_id & linke_followers$user_id & fdp_followers$user_id, "Multiple", "Other"))

下面,我重建了我正在使用的数据帧的样本。

users_full <- data.frame(
user_id = c("3854371132", "883470465498587138", "145216962", "2223089418", 
"2861583057", "271413649"))

spd_followers <- data.frame(
user_id = c("145216962", "3864655101", "757305123165069312", "4854498122", 
"1201495387", "565422099"))


afd_followers <- data.frame(
user_id = c("3854371132", "883470465498587138", "845969869778685952", "3864655101", 
"757305123165069312", "793677341042044928"))

cdu_followers <- data.frame(
user_id = c("3854371132", "145216962", "3864655101", "757305123165069312", 
"3207639056", "4854498122"))

linke_followers <- data.frame(
user_id = c("47289872", "1044855103", "565082298", 
"956148596042330112", "2490464967", "956147739951329280"))

我想得到以下输出:

user_id party_followers
883470465498587000  AfD
3854371132  Multiple
1044855103  Linke
757305123165069000  Multiple
3207639056  SPD
947682953   Other

3 个答案:

答案 0 :(得分:2)

为了在没有复制/粘贴的情况下进行扩展,以防您有其他follower数据框,我会将所有关注者放在list中,将它们折叠为单个数据框,并使用{{ 1}}。

使用merge

data.table

我不确定您想要的followers = list(spd = spd_followers, afd = afd_followers, cdu = cdu_followers, linke = linke_followers) foll = data.table::rbindlist(followers, idcol = "party_followers") setkey(foll, "user_id") foll[, user_id := as.character(user_id)] setDT(users_full, key = "user_id") users_full[, user_id := as.character(user_id)] foll[, n := .N, by = user_id] foll[n > 1, party_followers := "multiple"] foll = unique(foll) merge(users_full, foll, all = TRUE) # user_id party_followers n # 1: 1044855103 linke 1 # 2: 1201495387 spd 1 # 3: 145216962 multiple 2 # 4: 2223089418 NA NA # 5: 2490464967 linke 1 # 6: 271413649 NA NA # 7: 2861583057 NA NA # 8: 3207639056 cdu 1 ... 行为。使用merge将所有用户包含在任何数据框中。使用all = TRUE仅限all.x = TRUEusers_full内的用户进行内部联接,仅限all = FALSE位于users_full以及至少其中一位的用户follower个数据框。我离开了n列,显示了每个用户所在的网络数量 - 如果需要,可以将其删除。

答案 1 :(得分:1)

这是一种仅使用基本R的功能方法,可以通过添加到关注者名单/派对名称来轻松扩展。在过去,我使用过sapply()但建议在函数中使用vapply(),因为其输出格式更具可预测性。

# Define function to get party name for each user ID
# by checking membership in each party.
get_party <- function(user_id, 
                      id_list = followers_by_party, 
                      id_labels = party_names) {
  in_party <- vapply(id_list, function(x) user_id %in% x$user_id, logical(1))
  if (sum(in_party) == 0) return('Other')
  if (sum(in_party) > 1) return('Multiple')
  return(id_labels[in_party])
}

# create list of user IDs by party and vector of name labels.
followers_by_party <- list(spd_followers, afd_followers, cdu_followers, linke_followers)
party_names <- c('SPD','AfD','CDU','Linke')

# apply this function to each of the user IDs
users_full$party_followers <- vapply(users_full$user_id, get_party, character(1))

答案 2 :(得分:0)

不使用ifelse,只是从基数R进行矢量比较。最后检查多个,所以它会覆盖其他方名称的位置。

spd <- users_full$user_id %in% spd_followers$user_id
afd <- users_full$user_id %in% afd_followers$user_id
cdu <- users_full$user_id %in% cdu_followers$user_id
linke <- users_full$user_id %in% linke_followers$user_id
multiple <- (afd + cdu + spd + linke) > 1
other <- (afd + cdu + spd + linke) == 0

party_followers <- rep(NA, length(spd))
party_followers[spd] <- "SPD"; party_followers[afd] <- "AFD"
party_followers[cdu] <- "CDU"; party_followers[linke] <- "Linke"
party_followers[multiple] <- "Multiple"; party_followers[other] <- "Other"

users_full$party_followers <- party_followers