我有一个数据框users_full
,我想在其中创建一个新变量party_followers
,它包含以下类别:AFD,SPD,CDU,FDP,Linke,Multiple或其他
我想通过将users_full$user_id
与关注者各自数据框的user_id
变量相匹配来创建新变量的类别,例如afd_followers
,spd_followers
,等。
问题是users_full$user_id
可能是多方的追随者,我不知道如何使用ifelse语句解释这一点。
我尝试了以下内容,但它无法正常工作。
mutate(users_full, party_followers = ifelse(user_id == afd_followers$user_id & user_id != cdu_followers$user_id & user_id != spd_followers$user_id & user_id != linke_followers$user_id & user_id != fdp_followers$user_id, "AfD",
user_id == cdu_followers$user_id & user_id != afd_followers$user_id & user_id != spd_followers$user_id & user_id != linke_followers$user_id & user_id != fdp_followers$user_id, "CDU",
user_id == spd_followers$user_id & user_id != cdu_followers$user_id & user_id != afd_followers$user_id & user_id != linke_followers$user_id & user_id != fdp_followers$user_id, "SPD",
user_id == linke_followers$user_id & user_id != cdu_followers$user_id & user_id != afd_followers$user_id & user_id != spd_followers$user_id & user_id != fdp_followers$user_id, "Linke",
user_id == fdp_followers$user_id & user_id != cdu_followers$user_id & user_id != afd_followers$user_id & user_id != spd_followers$user_id & user_id != linke_followers$user_id, "FDP",
user_id == afd_followers$user_id & cdu_followers$user_id & spd_followers$user_id & linke_followers$user_id & fdp_followers$user_id, "Multiple", "Other"))
下面,我重建了我正在使用的数据帧的样本。
users_full <- data.frame(
user_id = c("3854371132", "883470465498587138", "145216962", "2223089418",
"2861583057", "271413649"))
spd_followers <- data.frame(
user_id = c("145216962", "3864655101", "757305123165069312", "4854498122",
"1201495387", "565422099"))
afd_followers <- data.frame(
user_id = c("3854371132", "883470465498587138", "845969869778685952", "3864655101",
"757305123165069312", "793677341042044928"))
cdu_followers <- data.frame(
user_id = c("3854371132", "145216962", "3864655101", "757305123165069312",
"3207639056", "4854498122"))
linke_followers <- data.frame(
user_id = c("47289872", "1044855103", "565082298",
"956148596042330112", "2490464967", "956147739951329280"))
我想得到以下输出:
user_id party_followers
883470465498587000 AfD
3854371132 Multiple
1044855103 Linke
757305123165069000 Multiple
3207639056 SPD
947682953 Other
答案 0 :(得分:2)
为了在没有复制/粘贴的情况下进行扩展,以防您有其他follower
数据框,我会将所有关注者放在list
中,将它们折叠为单个数据框,并使用{{ 1}}。
使用merge
:
data.table
我不确定您想要的followers = list(spd = spd_followers, afd = afd_followers, cdu = cdu_followers, linke = linke_followers)
foll = data.table::rbindlist(followers, idcol = "party_followers")
setkey(foll, "user_id")
foll[, user_id := as.character(user_id)]
setDT(users_full, key = "user_id")
users_full[, user_id := as.character(user_id)]
foll[, n := .N, by = user_id]
foll[n > 1, party_followers := "multiple"]
foll = unique(foll)
merge(users_full, foll, all = TRUE)
# user_id party_followers n
# 1: 1044855103 linke 1
# 2: 1201495387 spd 1
# 3: 145216962 multiple 2
# 4: 2223089418 NA NA
# 5: 2490464967 linke 1
# 6: 271413649 NA NA
# 7: 2861583057 NA NA
# 8: 3207639056 cdu 1
...
行为。使用merge
将所有用户包含在任何数据框中。使用all = TRUE
仅限all.x = TRUE
或users_full
内的用户进行内部联接,仅限all = FALSE
位于users_full
以及至少其中一位的用户follower
个数据框。我离开了n
列,显示了每个用户所在的网络数量 - 如果需要,可以将其删除。
答案 1 :(得分:1)
这是一种仅使用基本R的功能方法,可以通过添加到关注者名单/派对名称来轻松扩展。在过去,我使用过sapply()
但建议在函数中使用vapply()
,因为其输出格式更具可预测性。
# Define function to get party name for each user ID
# by checking membership in each party.
get_party <- function(user_id,
id_list = followers_by_party,
id_labels = party_names) {
in_party <- vapply(id_list, function(x) user_id %in% x$user_id, logical(1))
if (sum(in_party) == 0) return('Other')
if (sum(in_party) > 1) return('Multiple')
return(id_labels[in_party])
}
# create list of user IDs by party and vector of name labels.
followers_by_party <- list(spd_followers, afd_followers, cdu_followers, linke_followers)
party_names <- c('SPD','AfD','CDU','Linke')
# apply this function to each of the user IDs
users_full$party_followers <- vapply(users_full$user_id, get_party, character(1))
答案 2 :(得分:0)
不使用ifelse,只是从基数R进行矢量比较。最后检查多个,所以它会覆盖其他方名称的位置。
spd <- users_full$user_id %in% spd_followers$user_id
afd <- users_full$user_id %in% afd_followers$user_id
cdu <- users_full$user_id %in% cdu_followers$user_id
linke <- users_full$user_id %in% linke_followers$user_id
multiple <- (afd + cdu + spd + linke) > 1
other <- (afd + cdu + spd + linke) == 0
party_followers <- rep(NA, length(spd))
party_followers[spd] <- "SPD"; party_followers[afd] <- "AFD"
party_followers[cdu] <- "CDU"; party_followers[linke] <- "Linke"
party_followers[multiple] <- "Multiple"; party_followers[other] <- "Other"
users_full$party_followers <- party_followers