我有2张桌子
我需要合并它们。大多数列都是相同的,因此实际上只有名为“ FLatSchool”的列中的信息才会从data_id1添加到data_id:
all_data = merge (data_id, data_id_1, by=c("ID_w2", "ID_w3", "ID_w4", "ID_w5",
"ID_MC_w3", "ID_MC_w5",
"School", "Class"),
all.x = T )
在合并之前,我检查“ data_id1”,并发现“ FLatSchool”列中存在不同的(数字)值。但是,当两个表合并时,结果表中的该列仅包含NA(其他列没有问题,只有这一列没有问题)。可能是什么问题?
数据:
> dput(data_id)
structure(list(School = c(3L, 3L, 3L), Class = c(10L, 10L, 10L
), ID_w2 = structure(1:3, .Label = c("RU8_800", "RU8_801", "RU8_802"
), class = "factor"), ID_all = 71163901:71163903, ID_w3 = 427748:427750,
ID_MC_w3 = structure(1:3, .Label = c("stp94660", "stp94661",
"stp94662"), class = "factor"), ID_w4 = 428617:428619, ID_w5 =
428725:428727,
ID_MC_w5 = structure(1:3, .Label = c("STP114890", "STP114891",
"STP114892"), class = "factor")), .Names = c("School", "Class",
"ID_w2", "ID_all", "ID_w3", "ID_MC_w3", "ID_w4", "ID_w5", "ID_MC_w5"
), row.names = c(NA, 3L), class = "data.frame")
> dput(data_id_1)
structure(list(ID_w2 = structure(c(NA, 2L, 1L), .Label = c("RU8_235",
"RU8_239"), class = "factor"), ID_w3 = 427521:427523, ID_MC_w3 =
structure(1:3, .Label = c("stp94433",
"stp94434", "stp94435"), class = "factor"), ID_w4 = 428390:428392,
ID_w5 = 428781:428783, ID_MC_w5 = structure(1:3, .Label = c("stp114946",
"stp114947", "stp114948"), class = "factor"), School = c(1L,
1L, 1L), Class = c(5L, 5L, 5L), FLatSchool = c(1L, 1L, 1L
)), .Names = c("ID_w2", "ID_w3", "ID_MC_w3", "ID_w4", "ID_w5",
"ID_MC_w5", "School", "Class", "FLatSchool"), row.names = c(NA,
3L), class = "data.frame")
使用上面的脚本后,我得到的是
> dput(all_data)
structure(list(ID_w2 = structure(1:3, .Label = c("RU8_800", "RU8_801",
"RU8_802"), class = "factor"), ID_w3 = 427748:427750, ID_w4 =
428617:428619,
ID_w5 = 428725:428727, ID_MC_w3 = structure(1:3, .Label = c("stp94660",
"stp94661", "stp94662"), class = "factor"), ID_MC_w5 = structure(1:3,
.Label = c("STP114890",
"STP114891", "STP114892"), class = "factor"), School = c(3L,
3L, 3L), Class = c(10L, 10L, 10L), ID_all = 71163901:71163903,
FLatSchool = c(NA_integer_, NA_integer_, NA_integer_)), .Names =
c("ID_w2",
"ID_w3", "ID_w4", "ID_w5", "ID_MC_w3", "ID_MC_w5", "School",
"Class", "ID_all", "FLatSchool"), row.names = c(NA, -3L), class =
"data.frame")
我期望的是
> dput(all_data)
structure(list(ID_w2 = structure(1:3, .Label = c("RU8_800", "RU8_801",
"RU8_802"), class = "factor"), ID_w3 = 427748:427750, ID_w4 =
428617:428619,
ID_w5 = 428725:428727, ID_MC_w3 = structure(1:3, .Label = c("stp94660",
"stp94661", "stp94662"), class = "factor"), ID_MC_w5 = structure(1:3,
.Label = c("STP114890",
"STP114891", "STP114892"), class = "factor"), School = c(3L,
3L, 3L), Class = c(10L, 10L, 10L), ID_all = 71163901:71163903,
FLatSchool = c(1, 1, 1)), .Names = c("ID_w2",
"ID_w3", "ID_w4", "ID_w5", "ID_MC_w3", "ID_MC_w5", "School",
"Class", "ID_all", "FLatSchool"), row.names = c(NA, -3L), class =
"data.frame")
答案 0 :(得分:0)
您要添加观察结果吗?我也建议使用library(dplyr)。
left_join(x, y, by = c("a" = "b"), copy = FALSE, suffix = c(".x", ".y"), ...)
确保FLatSchool
位于“左侧”以保留这些值。
在by语句中,您可以有多个变量。
后缀 = 如果x和y中存在未联接的重复变量,则将这些后缀添加到输出中以消除歧义。应该是长度为2的字符向量。
答案 1 :(得分:0)
感谢大家的回应!问题解决了,我只需要在合并公式中输入“ all = TRUE”即可:
all_data = merge (data_id, data_id_1, by=c("ID_w2", "ID_w3", "ID_w4", "ID_w5",
"ID_MC_w3", "ID_MC_w5",
"School", "Class"),
all.x = T , all = T)