我想在最后粘贴来自2 dfs n
和p
- dput
的字符串。
它们的大小不同nrow(n) = 25
和nrow(p) = 20
有两个因素:factor1
(二进制)和factor2
(整数)
head(n,3) head(p,3)
string factor1 factor2 string factor1 factor2
-- -- -- -- -- --
h f1 5 i f1 1
h f1 6 c f1 2
h f1 7 c f1 3
tail(n,3) tail(p,3)
string factor1 factor2 string factor1 factor2
-- -- -- -- -- --
a f2 27 h f2 18
g f2 28 i f2 19
b f2 29 i f2 20
在这里,我想创建一个数据框
output <- paste (p - n) # error n an p different length
output <- merge (p,n, all=T) # merge into one df
output <- tapply(output, 1, paste) # same error
output <- tapply(output[which((output$factor == output$factor & output$factor2 == output$factor2 ))], 1, paste) # nonsensical
抱歉缺少“最小代码”......
预期输出:
head(output) tail(output)
string factor factor2 string factor factor2
-- -- -- -- -- --
i f1 1 g f2 24
c f1 2 e f1 25
c f1 3 j f1 26
g f1 4 a f2 27
fh f1 5 g f2 28
ih f1 6 b f2 29
> dput(n)
structure(list(string = structure(c(7L, 7L, 7L, 4L, 5L, 2L, 2L,
1L, 4L, 1L, 1L, 2L, 3L, 1L, 4L, 1L, 8L, 8L, 2L, 6L, 5L, 8L, 1L,
6L, 2L), .Label = c("a", "b", "c", "d", "e", "g", "h", "j"), class = "factor"),
factor = c("f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1",
"f1", "f1", "f2", "f2", "f2", "f2", "f2", "f2", "f2", "f2",
"f2", "f2", "f1", "f1", "f2", "f2", "f2"), factor2 = 5:29), .Names = c("string",
"factor", "factor2"), row.names = c(NA, -25L), class = "data.frame")
> dput(p)
structure(list(string = structure(c(5L, 1L, 1L, 3L, 2L, 5L, 5L,
6L, 4L, 6L, 6L, 5L, 4L, 6L, 6L, 6L, 6L, 4L, 5L, 5L), .Label = c("c",
"f", "g", "h", "i", "j"), class = "factor"), factor = c("f1",
"f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", "f1", "f2", "f2",
"f2", "f2", "f2", "f2", "f2", "f2", "f2", "f2"), factor2 = 1:20), .Names = c("string",
"factor", "factor2"), row.names = c(NA, -20L), class = "data.frame")
答案 0 :(得分:0)
使用dplyr
和purrr
,我们可以先full_join
,然后paste
我们省略NAs的两个字符串的向量:
library(tidyverse)
full_join(n, p, by = c('factor', 'factor2')) %>%
mutate(string = map2(as.character(string.x), as.character(string.y),
~paste0(na.omit(c(.y, .x)), collapse = ''))) %>%
select(-string.x, -string.y)
factor factor2 string 1 f1 5 fh 2 f1 6 ih 3 f1 7 ih 4 f1 8 jd 5 f1 9 he 6 f1 10 jb 7 f1 11 b 8 f1 12 a 9 f1 13 d 10 f1 14 a 11 f2 15 ja 12 f2 16 jb 13 f2 17 jc 14 f2 18 ha 15 f2 19 id 16 f2 20 ia 17 f2 21 j 18 f2 22 j 19 f2 23 b 20 f2 24 g 21 f1 25 e 22 f1 26 j 23 f2 27 a 24 f2 28 g 25 f2 29 b 26 f1 1 i 27 f1 2 c 28 f1 3 c 29 f1 4 g 30 f2 11 j 31 f2 12 i 32 f2 13 h 33 f2 14 j
在基地R:
np <- merge(n, p, c('factor', 'factor2'), all = TRUE)
np$string <- mapply(function(x, y) paste0(na.omit(c(x, y)), collapse = ''),
as.character(np$string.y), as.character(np$string.x))
np[, -c(3:4)]