Question

我想在最后粘贴来自2 dfs n和p - dput的字符串。它们的大小不同nrow(n) = 25和nrow(p) = 20 有两个因素：factor1（二进制）和factor2（整数）

head(n,3)                       head(p,3)
string   factor1  factor2        string    factor1  factor2
--       --       --             --        --       --
h        f1       5              i         f1       1
h        f1       6              c         f1       2
h        f1       7              c         f1       3

tail(n,3)                       tail(p,3)
string   factor1  factor2        string    factor1  factor2
--       --       --             --        --       --
a        f2       27             h         f2       18
g        f2       28             i         f2       19
b        f2       29             i         f2       20

在这里，我想创建一个数据框

没有遗漏任何因素
在一组因子相同时粘贴n和p的字符串
如果只有一组唯一的因子可用，请粘贴一个值

output <- paste (p - n) # error n an p different length
output <- merge (p,n, all=T) # merge into one df
output <- tapply(output, 1, paste) # same error
output <- tapply(output[which((output$factor == output$factor & output$factor2 == output$factor2 ))], 1, paste) # nonsensical

抱歉缺少“最小代码”......

----

预期输出：

head(output)                   tail(output)
string   factor   factor2        string    factor   factor2
--       --       --             --        --       --
i        f1       1              g         f2       24
c        f1       2              e         f1       25
c        f1       3              j         f1       26
g        f1       4              a         f2       27
fh       f1       5              g         f2       28  
ih       f1       6              b         f2       29

-----

> dput(n)
    structure(list(string = structure(c(7L, 7L, 7L, 4L, 5L, 2L, 2L, 
1L, 4L, 1L, 1L, 2L, 3L, 1L, 4L, 1L, 8L, 8L, 2L, 6L, 5L, 8L, 1L, 
6L, 2L), .Label = c("a", "b", "c", "d", "e", "g", "h", "j"), class = "factor"), 
    factor = c("f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", 
    "f1", "f1", "f2", "f2", "f2", "f2", "f2", "f2", "f2", "f2", 
    "f2", "f2", "f1", "f1", "f2", "f2", "f2"), factor2 = 5:29), .Names = c("string", 
"factor", "factor2"), row.names = c(NA, -25L), class = "data.frame")

> dput(p)
     structure(list(string = structure(c(5L, 1L, 1L, 3L, 2L, 5L, 5L, 
6L, 4L, 6L, 6L, 5L, 4L, 6L, 6L, 6L, 6L, 4L, 5L, 5L), .Label = c("c", 
"f", "g", "h", "i", "j"), class = "factor"), factor = c("f1", 
"f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", "f2", "f2", 
"f2", "f2", "f2", "f2", "f2", "f2", "f2", "f2"), factor2 = 1:20), .Names = c("string", 
"factor", "factor2"), row.names = c(NA, -20L), class = "data.frame")

Answer 1

使用dplyr和purrr，我们可以先full_join，然后paste我们省略NAs的两个字符串的向量：

library(tidyverse)
full_join(n, p, by = c('factor', 'factor2')) %>% 
  mutate(string = map2(as.character(string.x), as.character(string.y), 
                       ~paste0(na.omit(c(.y, .x)), collapse = ''))) %>% 
  select(-string.x, -string.y)

   factor factor2 string
1      f1       5     fh
2      f1       6     ih
3      f1       7     ih
4      f1       8     jd
5      f1       9     he
6      f1      10     jb
7      f1      11      b
8      f1      12      a
9      f1      13      d
10     f1      14      a
11     f2      15     ja
12     f2      16     jb
13     f2      17     jc
14     f2      18     ha
15     f2      19     id
16     f2      20     ia
17     f2      21      j
18     f2      22      j
19     f2      23      b
20     f2      24      g
21     f1      25      e
22     f1      26      j
23     f2      27      a
24     f2      28      g
25     f2      29      b
26     f1       1      i
27     f1       2      c
28     f1       3      c
29     f1       4      g
30     f2      11      j
31     f2      12      i
32     f2      13      h
33     f2      14      j

在基地R：

np <- merge(n, p, c('factor', 'factor2'), all = TRUE)
np$string <- mapply(function(x, y) paste0(na.omit(c(x, y)), collapse = ''), 
                    as.character(np$string.y), as.character(np$string.x))
np[, -c(3:4)]

粘贴两个不同大小的数据帧

----

-----

1 个答案: