我想用各种字符串追加dataframe1的列标题。我有另一个dataframe2,其中包含列名和要追加的字符串。
如何在dataframe1中附加列名?
dataframe1:
id C1_A C2_A C3_A C4_A C5_A
11 0 0 0 1 2
12 0 3 2 1 0
13 2 0 0 2 3
14 0 0 2 1 1
dataframe2
C S
C1_A HP
C2_A LP
C3_A KP
C4_A KP
C5_A HP
Desired output dataframe1
id HP_C1_A LP_C2_A KP_C3_A KP_C4_A HP_C5_A
11 0 0 0 1 2
12 0 3 2 1 0
13 2 0 0 2 3
14 0 0 2 1 1
答案 0 :(得分:2)
以下是使用str_replace_all
中的stringr
的另一种解决方案:
library(dplyr)
library(stringr)
df2 %>%
mutate(S = paste(S, C, sep = "_")) %>%
{setNames(.$S, .$C)} %>%
str_replace_all(names(df1), .) %>%
setNames(df1, .)
<强>结果:强>
id HP_C1_A LP_C2_A KP_C3_A KP_C4_A HP_C5_A
1 11 0 0 0 1 2
2 12 0 3 2 1 0
3 13 2 0 0 2 3
4 14 0 0 2 1 1
数据:强>
df1 = read.table(text = " id C1_A C2_A C3_A C4_A C5_A
11 0 0 0 1 2
12 0 3 2 1 0
13 2 0 0 2 3
14 0 0 2 1 1
", header = TRUE, stringsAsFactors = FALSE)
df2 = read.table(text = "C S
C1_A HP
C2_A LP
C3_A KP
C4_A KP
C5_A HP", header = TRUE, stringsAsFactors = FALSE)
修改强>
@markdly指出,可以编写以下单行代码来远离dplyr
:
names(df1) <- str_replace_all(names(df1), setNames(paste0(df2$S, "_", df2$C), df2$C))
答案 1 :(得分:1)
library(dplyr)
library(tidyr)
# example data frames
df1 = read.table(text = "
id C1_A C2_A C3_A C4_A C5_A
11 0 0 0 1 2
12 0 3 2 1 0
13 2 0 0 2 3
14 0 0 2 1 1", header = T)
df2 = read.table(text = "
C S
C1_A HP
C2_A LP
C3_A KP
C4_A KP
C5_A HP", header = T, stringsAsFactors = F)
df1 %>%
gather(C, value, -id) %>% # reshape and make column names a variable C
left_join(df2, by = "C") %>% # so we can join and get the corresponding S values in another column
unite("S_C", S, C) %>% # combine values of S and C
spread(S_C, value) # reshape back to original form
# id HP_C1_A HP_C5_A KP_C3_A KP_C4_A LP_C2_A
# 1 11 0 2 0 1 0
# 2 12 0 0 2 1 3
# 3 13 2 3 0 2 0
# 4 14 0 1 2 1 0
答案 2 :(得分:1)
另一种使用基础R的match
的方法
df1 <- dataframe1
df2 <- dataframe2
nm <- names(df1)
names(df1) <- ifelse(nm %in% df2$C, paste0(df2$S[match(nm, df2$C)], "_", nm), nm)
df1
#> id HP_C1_A LP_C2_A KP_C3_A KP_C4_A HP_C5_A
#> 1 11 0 0 0 1 2
#> 2 12 0 3 2 1 0
#> 3 13 2 0 0 2 3
#> 4 14 0 0 2 1 1
答案 3 :(得分:0)
使用基础R的非常简单的解决方案。
df1 <- data.frame(id = 11:14, C1_A = c(0,0,2,0),
C2_A = c(0,3,0,0), C4_A = c(1,1,2,1),
C5_A = c(2,0,3,1))
df2 <- data.frame(col = c('C', 'C1_A', 'C2_A', 'C3_A',
'C4_A', 'C5_A'), comp = c('S', 'HP', 'LP',
'KP', 'KP', 'HP'), stringsAsFactors = FALSE)
idx <- df2$col %in% colnames(df1)
idx2 <- colnames(df1) %in% df2$col
colnames(df1)[idx2] <- paste(df2$comp[idx], colnames(df1)[idx2], sep = '_')