匹配列标题并重命名以追加带列名称的字符串

时间:2017-11-01 17:00:55

标签: r

我想用各种字符串追加dataframe1的列标题。我有另一个dataframe2,其中包含列名和要追加的字符串。

如何在dataframe1中附加列名?

dataframe1:
id  C1_A C2_A C3_A C4_A C5_A
11   0     0    0    1    2 
12   0     3    2    1    0
13   2     0    0    2    3
14   0     0    2    1    1

dataframe2
C      S
C1_A   HP
C2_A   LP
C3_A   KP
C4_A   KP
C5_A   HP

Desired output dataframe1
id  HP_C1_A LP_C2_A KP_C3_A KP_C4_A HP_C5_A
11     0       0       0       1       2
12     0       3       2       1       0
13     2       0       0       2       3
14     0       0       2       1       1

4 个答案:

答案 0 :(得分:2)

以下是使用str_replace_all中的stringr的另一种解决方案:

library(dplyr)
library(stringr)

df2 %>%
  mutate(S = paste(S, C, sep = "_")) %>%
  {setNames(.$S, .$C)} %>%
  str_replace_all(names(df1), .) %>%
  setNames(df1, .)

<强>结果:

  id HP_C1_A LP_C2_A KP_C3_A KP_C4_A HP_C5_A
1 11       0       0       0       1       2
2 12       0       3       2       1       0
3 13       2       0       0       2       3
4 14       0       0       2       1       1

数据:

df1 = read.table(text = "  id  C1_A C2_A C3_A C4_A C5_A
                 11   0     0    0    1    2 
                 12   0     3    2    1    0
                 13   2     0    0    2    3
                 14   0     0    2    1    1
                 ", header = TRUE, stringsAsFactors = FALSE)

df2 = read.table(text = "C      S
                 C1_A   HP
                 C2_A   LP
                 C3_A   KP
                 C4_A   KP
                 C5_A   HP", header = TRUE, stringsAsFactors = FALSE)

修改

@markdly指出,可以编写以下单行代码来远离dplyr

names(df1) <- str_replace_all(names(df1), setNames(paste0(df2$S, "_", df2$C), df2$C))

答案 1 :(得分:1)

library(dplyr)
library(tidyr)

# example data frames
df1 = read.table(text = "
id  C1_A C2_A C3_A C4_A C5_A
11   0     0    0    1    2 
12   0     3    2    1    0
13   2     0    0    2    3
14   0     0    2    1    1", header = T)

df2 = read.table(text = "
C      S
C1_A   HP
C2_A   LP
C3_A   KP
C4_A   KP
C5_A   HP", header = T, stringsAsFactors = F)


df1 %>%
  gather(C, value, -id) %>%      # reshape and make column names a variable C
  left_join(df2, by = "C") %>%   # so we can join and get the corresponding S values in another column
  unite("S_C", S, C) %>%         # combine values of S and C
  spread(S_C, value)             # reshape back to original form

#   id HP_C1_A HP_C5_A KP_C3_A KP_C4_A LP_C2_A
# 1 11       0       2       0       1       0
# 2 12       0       0       2       1       3
# 3 13       2       3       0       2       0
# 4 14       0       1       2       1       0

答案 2 :(得分:1)

另一种使用基础R的match的方法

df1 <- dataframe1
df2 <- dataframe2

nm <- names(df1)
names(df1) <- ifelse(nm %in% df2$C, paste0(df2$S[match(nm, df2$C)], "_", nm), nm) 
df1

#>   id HP_C1_A LP_C2_A KP_C3_A KP_C4_A HP_C5_A
#> 1 11       0       0       0       1       2
#> 2 12       0       3       2       1       0
#> 3 13       2       0       0       2       3
#> 4 14       0       0       2       1       1

答案 3 :(得分:0)

使用基础R的非常简单的解决方案。

df1 <- data.frame(id = 11:14, C1_A = c(0,0,2,0),
       C2_A = c(0,3,0,0), C4_A = c(1,1,2,1),
       C5_A = c(2,0,3,1))

df2 <- data.frame(col = c('C', 'C1_A', 'C2_A', 'C3_A',
                    'C4_A', 'C5_A'), comp = c('S', 'HP', 'LP',
                                       'KP', 'KP', 'HP'), stringsAsFactors = FALSE)

idx <- df2$col %in% colnames(df1)
idx2 <- colnames(df1) %in% df2$col

colnames(df1)[idx2] <- paste(df2$comp[idx], colnames(df1)[idx2], sep = '_')