示例数据:
full<-structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("AKS",
"AOK", "BTX", "GTX", "HKS", "JKS", "LOK", "MKS", "MOK", "PKS",
"SKS", "VTX"), class = "factor"), CT_NT = structure(c(1L, 1L,
1L, 1L, 1L, 1L), .Label = c("CT", "NT"), class = "factor"), Depth = c(5L,
10L, 15L, 5L, 10L, 15L), Site = c(1L, 1L, 1L, 1L, 1L, 1L), PW = c(22.8,
21.5, 18.2, 22.5, 20.5, 19.2), BD = c(1.1, 1.2, 1.1, 1.3, 1.3,
1.5)), .Names = c("Location", "CT_NT", "Depth", "Site", "PW",
"BD"), row.names = c(NA, 6L), class = "data.frame")
osu<-structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("AKS",
"AOK", "BTX", "GTX", "HKS", "JKS", "LOK", "MKS", "MOK", "PKS",
"SKS", "VTX"), class = "factor"), CT_NT = structure(c(1L, 1L,
1L, 2L, 2L, 2L), .Label = c("CT", "NT"), class = "factor"), Depth = c(5L,
10L, 15L, 5L, 10L, 15L), pH = c(5.1, 5.4, 5.9, 5.2, 5.9, 6.2),
N = c(50, 31, 22, 35, 17, 8), P = c(122, 55, 34, 107, 23,
17), K = c(1301, 1202, 1078, 1196, 1028, 948), OM = c(2.3,
1.8, 1.5, 2.1, 1.4, 1.2), NH4 = c(19.3, 14.5, 11.6, 12.3,
8.6, 8.4), Sand = c(22.5, 25, 25, 25, 22.5, 18.8), Silt = c(56.3,
52.5, 50, 51.3, 52.5, 51.3), Clay = c(21.3, 22.5, 25, 23.8,
25, 30)), .Names = c("Location", "CT_NT", "Depth", "pH",
"N", "P", "K", "OM", "NH4", "Sand", "Silt", "Clay"), row.names = c(NA,
6L), class = "data.frame")
我正在尝试使用dplyr中的left_join连接两个数据集。令我惊讶的是,我得到了重复的行,这些行在某种程度上没有被识别出来。在阅读了所有其他答案之后,我可以在这里找到我想要解决的问题&#34;加入&#34;问题(至少我不是唯一拥有它们的人......?),我试过了:
我之前使用过left_join而没有遇到过这个问题,而且它的数据集非常相似(事实上这个完整研究的试验数据)。我以为我明白了left_join在做什么,但现在我想知道我是不是真的。我试图通过使用dplyr来改善,但不幸的是,我很多人都在抨击事情,直到某些事情发生作用和我能弄明白为什么它有效,所以我可以重现它稍后再根据需要。
鉴于我的经验不足,我确信答案对于所涉及的每个人来说都是令人沮丧的直截了当和简单。我想这就是学习编码的生活。提前感谢您处理新手的问题!
这是我的代码:
f1<-full %>% #Build pilot_summary. Pipe pilot to...
group_by(Location,CT_NT,Depth,Site) %>% #group_by to work on CT or NT at each site
summarise_at(5:6,funs(mean)) %>% #calculate site means
ungroup(f1)
f1$Depth<-as.factor(f1$Depth)
f1$Site<-NULL
osu$Texture_Class<-NULL#Take out the texture class column
f2<- osu %>%
group_by(Location,CT_NT,Depth) %>% #group because otherwise R tries to crash on the next line of code...
arrange(Location,CT_NT,Depth) %>% #Put everything in order like f1, just in case
ungroup(f2)
f2$Depth<-as.factor(f2$Depth)
full_summary<-left_join(f1,f2)