R中的合并错误

时间:2018-06-20 14:58:43

标签: r

对于两个示例数据帧:

gp <- structure(list(gp.code = structure(c(1L, 3L, 5L, 13L, 6L, 20L, 
                                           10L, 19L, 17L, 12L, 2L, 18L, 7L, 16L, 15L, 4L, 8L, 143L, 14L, 
                                           9L, 11L, 33L, 23L, 113L, 102L, 97L, 83L, 122L, 77L, 111L, 29L, 
                                           68L, 142L, 56L, 118L, 115L, 78L, 58L, 104L, 71L, 43L, 121L, 32L, 
                                           110L, 53L, 70L, 123L, 61L, 87L, 48L, 73L, 100L, 37L, 141L, 114L, 
                                           34L, 89L, 81L, 98L, 92L, 63L, 50L, 60L, 47L, 125L, 145L, 145L, 
                                           93L, 93L, 99L, 99L, 138L, 138L, 137L, 86L, 139L, 91L, 146L, 79L, 
                                           103L, 31L, 124L, 22L, 76L, 26L, 108L, 105L, 116L, 84L, 136L, 
                                           67L, 106L, 52L, 95L, 51L, 27L, 82L, 130L, 101L, 107L, 133L, 62L, 
                                           42L, 117L, 112L, 85L, 69L, 49L, 46L, 45L, 120L, 38L, 39L, 55L, 
                                           96L, 80L, 75L, 44L, 35L, 109L, 41L, 24L, 59L, 54L, 144L, 65L, 
                                           28L, 25L, 119L, 66L, 74L, 36L, 57L, 21L, 135L, 134L, 132L, 140L, 
                                           64L, 127L, 129L, 128L, 131L, 72L, 88L, 40L, 30L, 94L, 90L, 126L
), .Label = c("E82002", "E82014", "E82018", "E82019", "E82023", 
              "E82031", "E82037", "E82040", "E82041", "E82055", "E82058", "E82059", 
              "E82060", "E82062", "E82071", "E82077", "E82084", "E82095", "E82107", 
              "E82113", "M85001", "M85002", "M85005", "M85007", "M85008", "M85009", 
              "M85011", "M85013", "M85015", "M85019", "M85020", "M85021", "M85024", 
              "M85025", "M85030", "M85031", "M85037", "M85041", "M85042", "M85043", 
              "M85047", "M85048", "M85051", "M85052", "M85055", "M85056", "M85058", 
              "M85059", "M85062", "M85064", "M85065", "M85070", "M85074", "M85076", 
              "M85077", "M85078", "M85079", "M85084", "M85086", "M85088", "M85092", 
              "M85097", "M85098", "M85107", "M85111", "M85113", "M85115", "M85116", 
              "M85118", "M85127", "M85128", "M85134", "M85136", "M85141", "M85142", 
              "M85145", "M85146", "M85153", "M85154", "M85156", "M85167", "M85171", 
              "M85174", "M85176", "M85177", "M85178", "M85179", "M85600", "M85611", 
              "M85624", "M85634", "M85642", "M85652", "M85655", "M85669", "M85671", 
              "M85679", "M85684", "M85686", "M85693", "M85694", "M85699", "M85701", 
              "M85713", "M85715", "M85716", "M85717", "M85721", "M85730", "M85733", 
              "M85735", "M85736", "M85749", "M85753", "M85756", "M85757", "M85770", 
              "M85774", "M85776", "M85782", "M85783", "M85794", "M85797", "M85801", 
              "M88020", "M88021", "M89001", "M89002", "M89008", "M89009", "M89012", 
              "M89013", "M89021", "M89026", "M89027", "Y00412", "Y00471", "Y00492", 
              "Y01680", "Y02567", "Y02571", "Y02620", "Y02639", "Y02893", "Y02961", 
              "Y02963"), class = "factor"), cqc.rating = structure(c(1L, 1L, 
                                                                     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                     1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 2L, 1L, 1L, 1L, 1L, 
                                                                     5L, 1L, 5L, 1L, 5L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                     1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 
                                                                     1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                                                     1L, 1L, 1L, 1L), .Label = c("good", "inadequate", "not rated", 
                                                                                                 "oustanding", "requires improvement"), class = "factor")), .Names = c("gp.code", 
                                                                                                                                                                       "cqc.rating"), row.names = c(NA, 150L), class = "data.frame")


df <- structure(list(gp.code = structure(c(1L, 4L, 6L, 14L, 7L, 2L, 
                                     21L, 11L, 20L, 18L, 13L, 3L, 19L, 22L, 8L, 17L, 16L, 5L, 9L, 
                                     148L, 15L, 10L, 12L, 37L, 25L, 127L, 114L, 109L, 77L, 135L, 98L, 
                                     87L, 125L, 79L, 32L, 147L, 64L, 132L, 129L, 88L, 67L, 118L, 68L, 
                                     93L, 49L, 82L, 134L, 26L, 35L, 124L, 61L, 81L, 136L, 33L, 71L, 
                                     54L, 102L, 46L, 84L, 112L, 43L, 146L, 128L, 24L, 38L, 103L, 95L, 
                                     110L, 105L, 74L, 57L, 70L, 53L, 138L, 117L, 39L, 94L, 116L, 149L, 
                                     111L, 144L, 106L, 143L, 145L, 101L, 104L, 150L, 89L, 115L, 34L, 
                                     137L, 23L, 29L, 86L, 28L, 75L, 83L, 122L, 60L, 66L, 119L, 99L, 
                                     130L, 142L, 65L, 78L, 59L, 107L, 120L, 56L, 31L, 58L, 30L, 72L, 
                                     96L, 139L, 113L, 121L, 140L, 73L, 48L, 131L, 126L, 42L, 100L, 
                                     76L, 80L, 141L, 55L, 52L, 36L, 51L, 133L, 44L, 45L, 63L, 40L, 
                                     92L, 108L, 90L, 85L, 50L, 41L, 123L, 91L, 47L, 27L, 69L, 62L, 
                                     97L), .Label = c("E82002", "E82004", "E82014", "E82018", "E82019", 
                                                      "E82023", "E82031", "E82037", "E82040", "E82041", "E82055", "E82058", 
                                                      "E82059", "E82060", "E82062", "E82071", "E82077", "E82084", "E82095", 
                                                      "E82107", "E82113", "E82663", "M85002", "M85003", "M85005", "M85006", 
                                                      "M85007", "M85009", "M85010", "M85011", "M85014", "M85015", "M85018", 
                                                      "M85020", "M85021", "M85023", "M85024", "M85025", "M85028", "M85029", 
                                                      "M85030", "M85036", "M85037", "M85041", "M85042", "M85045", "M85047", 
                                                      "M85048", "M85051", "M85052", "M85055", "M85056", "M85058", "M85059", 
                                                      "M85062", "M85063", "M85064", "M85065", "M85070", "M85072", "M85074", 
                                                      "M85076", "M85077", "M85078", "M85081", "M85082", "M85084", "M85085", 
                                                      "M85086", "M85088", "M85092", "M85094", "M85097", "M85098", "M85100", 
                                                      "M85105", "M85108", "M85115", "M85116", "M85118", "M85127", "M85128", 
                                                      "M85133", "M85136", "M85142", "M85145", "M85146", "M85153", "M85154", 
                                                      "M85156", "M85159", "M85163", "M85164", "M85166", "M85167", "M85171", 
                                                      "M85172", "M85174", "M85176", "M85177", "M85178", "M85179", "M85611", 
                                                      "M85634", "M85642", "M85652", "M85669", "M85671", "M85679", "M85684", 
                                                      "M85686", "M85693", "M85694", "M85699", "M85701", "M85706", "M85711", 
                                                      "M85713", "M85715", "M85716", "M85717", "M85721", "M85730", "M85733", 
                                                      "M85735", "M85736", "M85749", "M85753", "M85756", "M85757", "M85770", 
                                                      "M85774", "M85782", "M85783", "M85794", "M85797", "M85801", "M88020", 
                                                      "M89009", "M89021", "Y00159", "Y00412", "Y00471", "Y00492", "Y01680", 
                                                      "Y02571", "Y02620", "Y02639", "Y02961", "Y02963"), class = "factor"), 
               antibiotic = c(1.23248149651249, 1.19804465710497, 0.753794802511325, 
                              0.85669917849255, 0.806766970145873, 1.2944351625755, 0.79749081458912, 
                              0.949915803767271, 1.28676136005656, 0.861894948337942, 0.98944777231592, 
                              0.77976175611218, 1.0802092104795, 1.18992427754597, 0.922230847446508, 
                              1.00968448247105, 1.00925275017575, 1.13856339619023, 1.29658868391219, 
                              3.43992412181159, 0.9405259515181, 1.04536664449872, 0.857195681526592, 
                              1.36040902899291, 1.1555007762595, 1.23099411388522, 1.2921619764172, 
                              1.20896911806371, 0.90601414991211, 1.48026866615811, 0.865283503864064, 
                              1.34285564503446, 0.919419926661631, 1.41915312988514, 1.2330635342805, 
                              3.66834851140276, 1.2803964023984, 0.777309332259057, 1.16760007845018, 
                              0.903108177347766, 1.07415817045842, 1.76503145582347, 0.662906258393768, 
                              1.11922205065869, 1.45743378132416, 1.40338387936522, 1.56356764856955, 
                              1.21554707497369, 0.765459254266153, 1.02985290952772, 0.747988215118069, 
                              1.28199535302764, 0.791630491986821, 1.45457105212014, 1.5360908424018, 
                              1.36219759497743, 1.2823181822961, 1.16445352400409, 0.867251210987798, 
                              0.93449947713661, 0.972235945064716, 0.952976072770419, 1.01713285255742, 
                              1.0094222885861, 0.875833539680039, 0.618892154842347, 0.472595751806604, 
                              0.496879988390655, 1.50731245234776, 1.04907441178441, 0.894164623526121, 
                              0.658261298693029, 0.726078998206472, 1.02776752877325, 1.19666179452119, 
                              0.97476267236602, 0.0127648710748021, 1.17439331625073, 5.8393330107237, 
                              1.59645232815965, 0.487542408650236, 1.14865894544346, 0.729495610858418, 
                              0.475652186678803, 0.810665743225695, 1.55727483921682, 0.509032628956674, 
                              1.08248967413256, 0.829656197645062, 0.883813971368163, 1.1606344950849, 
                              0.643888106444113, 0.658542420310134, 0.788100265873058, 
                              0.999993653251755, 0.549776366766276, 1.00900222339709, 0.759174545084884, 
                              0.732601429257463, 0.811032584239922, 0.992078825347759, 
                              0.916336303170667, 0.924425842068231, 0.833487920775124, 
                              1.2048401786876, 1.0710312446967, 1.15996384388112, 0.802575397465166, 
                              0.827940641127218, 0.988964351312201, 0.810501627167164, 
                              0.972188732451928, 1.21663117141513, 0.648182525899754, 1.24597821683072, 
                              1.25013278566623, 1.16685772173495, 0.878810966942241, 1.21188990166584, 
                              1.05209718360933, 0.928089616209815, 1.51726626492982, 0.955522092040987, 
                              1.14598540145985, 0.992072220256482, 1.17856657930143, 0.487420516416757, 
                              1.12018266962542, 0.999491890919433, 1.10449907263643, 1.38308178076077, 
                              1.0848078324396, 0.735665641476272, 0.815600508556523, 1.04175344119065, 
                              1.63317262657807, 0.941009543029732, 0.945643608300648, 0.785026349264038, 
                              1.11186113789778, 0.931541465655869, 0.950426305389678, 1.12222589692599, 
                              1.75509240895922, 1.39836663546273, 1.11387374264761, 1.42177823010633, 
                              0.957155370021804, 1.48242155040868, 1.1388984391116)), .Names = c("gp.code", 
                                                                                                 "antibiotic"), row.names = c(NA, 150L), class = "data.frame")

我希望将gp中的数据合并到df。这是我的数据示例,完整版本大约有8000条记录。

我通常使用以下代码: 新<-merge(df,gp,by = c(“ gp.code”),all.x = T)

但是当我运行它时,您可以看到它检索了“新”数据框中的154条记录。据我了解,all.x = TRUE引用df数据帧中的所有记录-为什么它会拾取更多的数据行?如果我将其更改为all.y = TRUE,它将获得150条记录。当我在完整的数据集上运行它时,我无法返回到df中的行数(使用all.x或all.y = T),而仅具有附加的合并列。

我在做什么错?还有另一个更合适的功能吗?

0 个答案:

没有答案