我有两个数据集(csv文件,即1000列和200列),并且我想根据中的两个列元素的准确性将第二个数据集的某些列名动态添加到我的第一个数据集中。两个数据集。 (困惑:如下所示查看预期和实际结果)
我尝试放入两个for循环来逐个元素地进行比较。一旦列元素匹配(匹配条件),就将该元素拖放到向量中。 对内部for循环中的所有元素重复此过程->内部for循环完成后,转置向量并将其转换为列向量–>根据外部for循环行的值将列添加到该特定行
df_1$Res_1 <- NA
df_1$Res_1 <- as.character(df_1$Res_1)
df_1_1 <- rep(NA,10)
df_1_1 <- as.character(df_1_1)
c = 1
for (p in 1:nrow(df_1)){
for(q in 1:nrow(df_2)){
if((df_1$Mat_1[p] == df_2$Mat_2[q]) {
df_1_1[c] <- df_2$Res[q]
c = c+1
}
df_1_1 <- t(df_1_1)
df_1$Res_1[p,] <- df_1_1[] #not sure how to add the remaining #columns like Res_2 and Res_3 etc
}
c = 1
}
错误是
df_1$Res_1[p,] <- df_1_1[]:
incorrect number of subscripts on matrix
答案 0 :(得分:0)
据我所知,这就是您想要的:
library(reshape2)
library(dplyr)
df3 = df2 %>%
group_by(Mat_2) %>%
mutate(group_no = row_number()) %>%
select(Mat_2, Res, group_no) %>%
melt(id.vars = c("Mat_2", "group_no")) %>%
mutate(variable = paste(variable, group_no, sep = "_")) %>%
select(-group_no) %>%
dcast(Mat_2 ~ variable, data = .)
left_join(df1, df3, by = c("Mat_1" = "Mat_2"))
# Mat_1 Col_1 Col_2 Col_3 Res_1 Res_2 Res_3
# 1 A d d d QQ TT YY
# 2 B o o o RR TT <NA>
# 3 C e e e QQ <NA> <NA>
# 4 A n n n QQ TT YY
# 5 E o o o FF <NA> <NA>
# 6 C t t t QQ <NA> <NA>
# 7 G m m m QQ <NA> <NA>
# 8 B a a a RR TT <NA>
# 9 A t t t QQ TT YY
# 10 X t t t <NA> <NA> <NA>
# 11 R e e e YY <NA> <NA>
# 12 A r r r QQ TT YY
# Warning message:
# Column `Mat_1`/`Mat_2` joining factors with different levels, coercing to character vector
使用此数据:
df1 = read.table(text = 'Mat_1 Col_1 Col_2 Col_3
A d d d
B o o o
C e e e
A n n n
E o o o
C t t t
G m m m
B a a a
A t t t
X t t t
R e e e
A r r r', header = T)
df2 = read.table(text = 'ID Mat_2 Res Col_1 Col_2 Col_3
1 A QQ i i i
2 C QQ t t t
3 V EE r r r
4 B RR e e e
5 G QQ a a a
6 A TT l l l
7 E FF l l l
8 B TT y y y
9 A YY d d d
10 P RR s o o
11 O EE e e e
12 O TT n n n
13 R YY o o o
14 T UU t t t
15 M OO m m m
16 M ZZ a a a
17 N VV t t t
18 J AA t t t
19 K SS e e e
20 L EE r r r', header = T)