所以我有一个庞大的数据框,并且我试图将我从多个数据框(约17个数据框)计算出的得分组合到这个数据框,我需要在12个不同的时间执行此过程。这是我拥有的示例数据框
df=structure(list(ï..id = structure(c(2L, 7L, 5L, 4L, 3L, 1L, 6L,
8L), .Label = c("B12", "B7", "C2", "C9", "D3", "E2", "E6", "R4"
), class = "factor"), age = c(42L, 45L, 83L, 59L, 49L, 46L, 52L,
23L)), class = "data.frame", row.names = c(NA, -8L))
因此,我需要使用igraph
包来计算网络指标。这是我与不同的人在一起的2个矩阵
net_mat1=structure(c("B7", "E6", "D3", "C9"), .Dim = c(2L, 2L), .Dimnames = list(
NULL, c("ï..target", "partner")))
net_mat2=structure(c("C2", "B12", "E2", "R4"), .Dim = c(2L, 2L), .Dimnames = list(
NULL, c("ï..target", "partner")))
这是我正在计算的
library(igraph)
g1=graph_from_edgelist(net_mat1)
g2=graph_from_edgelist(net_mat2)
degree_cent_close_1=centr_degree(g1, mode = "all")
degree.cent_close_1 #create object that contains metrics
degree.cent_close2=centr_degree(g2, mode = "all")
degree.cent_close2 #create another object that contains metrics
然后我创建包含我计算的指标的数据框
cent_score_df1=data.frame(degree_cent_close_1$res, V(g1)$name)
cent_score_df1
cent_score_df2=data.frame(degree.cent_close2$res, V(g2)$name)
cent_score_df2
然后,我尝试匹配这些指标的值并将其索引回df
数据帧中
df$centrality_scores <- cent_score_df1[ match(df[['id']], cent_score_df1[['V.g1..name']] ) , 'degree_cent_close_1.res']
df$centrality_scores
df$centrality_scores <- cent_score_df2[ match(df[['id']], cent_score_df2[['V.g2..name']] ) , 'degree.cent_close2.res']
df$centrality_scores
但是,似乎每次我尝试将数据与原始数据框合并时,它只能附加一半的数据。我永远不能同时附加两个数据框。有没有人有更好的方法来重新连接数据?如果有更快更干净的方法,我将不胜感激
答案 0 :(得分:1)
这行代码的问题是,您没有选择要更新的原始data.frame中的行,而是仅更新了前4行。
df$centrality_scores <- cent_score_df1[ match(df[['id']], cent_score_df1[['V.g1..name']] ) , 'degree_cent_close_1.res']
您打算这样做:
df$centrality_scores<-NA
df$centrality_scores[na.omit(match(df$id, cent_score_df1$id))]<- cent_score_df1$V.g1..name
解决此问题的另一种方法是标准化度量标准数据帧的列名,然后使用merge
函数将结果添加回原始数据帧。
names(cent_score_df1)<-c("centrality_scores", "id")
names(cent_score_df2)<-c("centrality_scores", "id")
cent_score<-rbind(cent_score_df1, cent_score_df2)
merge(df, cent_score, by.x="id", by.y="id")