我有一个数据框,该数据框的列名如下面的dput
数据所示。
structure(list(mosales = c(1L, 1L, 1L, 12L, 1L), sale123 = c(14.86,
8.97, 6.44, 463.61, 15.94), totsales = c(1L, 1L, 1L,
30L, 1L), totqty = c(1L, 1L, 1L, 34L, 2L), unqsales = c(1L,
1L, 1L, 6L, 2L), x1_rank_1 = c(1L, 1L, 1L, 0L, 1L), x1_rank_4 = c(0L,
0L, 0L, 1L, 0L), x1_rank_3 = c(0L, 0L, 0L, 0L, 0L), x1_rank_2 = c(0L,
0L, 0L, 0L, 0L), x2_rank_2 = c(1L, 1L, 0L, 0L, 1L), x2_rank_1 = c(0L,
0L, 1L, 0L, 0L), x2_rank_5 = c(0L, 0L, 0L, 1L, 0L), x2_rank_4 = c(0L,
0L, 0L, 0L, 0L), x2_rank_3 = c(0L, 0L, 0L, 0L, 0L), x3_rank_1 = c(1L,
1L, 1L, 0L, 1L), x3_rank_4 = c(0L, 0L, 0L, 1L, 0L), x3_rank_3 = c(0L,
0L, 0L, 0L, 0L), x3_rank_2 = c(0L, 0L, 0L, 0L, 0L), x4_rank_1 = c(1L,
1L, 1L, 0L, 0L), x4_rank_5 = c(0L, 0L, 0L, 1L, 0L), x4_rank_2 = c(0L,
0L, 0L, 0L, 1L), x4_rank_4 = c(0L, 0L, 0L, 0L, 0L), x4_rank_3 = c(0L,
0L, 0L, 0L, 0L), x5_rank_1 = c(1L, 1L, 1L, 0L, 0L), x5_rank_4 = c(0L,
0L, 0L, 1L, 0L), x5_rank_2 = c(0L, 0L, 0L, 0L, 1L), x5_rank_3 = c(0L,
0L, 0L, 0L, 0L)), row.names = c(36L, 41L, 72L, 79L, 137L), class = "data.frame")
我要尝试的是将以x1_rank, x2_rank .. and so forth
开头的列名称替换为名称,以使“ x1_rank ”应替换为 mosales ,“ x2_rank ”应替换为 sales123 ,“ x3_rank ”应替换为“ totsales ”, “ x4_rank ”应替换为“ totqty ”,“ x5_rank ”应替换为“ unqsales ”。
因此最终的列名称应如下所示:
mosales, sale123, totsales, totqty, unqsales, mosales_1, mosales_2,... sale123_1, sale123_2,... totsales_1, totsales_2,... totqty_1, totqty_2,... unqsales_1, unqsales_2,...
我尝试使用for循环和gsub,如下所示。这段代码运行时没有任何错误,但是我没有得到想要的东西。不确定错误在哪里。
df1 <- df
z <- names(df)
for (i in 1:length(z)){
gsub(paste0("x",i,"_rank"), z[i], names(df1))
}
df是可以使用上述dput数据创建的数据框。
答案 0 :(得分:2)
stringi
包中的字符串操作函数在pattern
,replacement
和(可选)string
上进行矢量化处理。这对于您的情况非常方便:
library(stringi)
orig <- c(stri_c("x", 1:5, "_rank"))
repl <- c("mosales", "sales123", "totsales", "totqty", "unqsales")
names(df) <- stri_replace_all_fixed(names(df), orig, repl,
vectorize_all = FALSE)
答案 1 :(得分:1)
您可以使用此法规\\bx1_rank[a-zA-Z]*
。
这将与字符串开头的x1_rank
匹配。
gsub
使您可以用所需的样式替换找到的样式。对所有需要的情况重复上述操作。
循环中:
repl <- c("mosales", "sales123", "totsales", "totqty", "unqsales")
for (i in 1:5) {
p = paste0("\\b^x", i, "_rank[a-zA-Z]*")
colnames(d) = gsub(pattern=p, repl[i], colnames(d))
}
colnames(d)
#
# [1] "mosales" "sale123" "totsales" "totqty" "unqsales" "mosales_1" "mosales_4"
# [8] "mosales_3" "mosales_2" "sales123_2" "sales123_1" "sales123_5" "sales123_4" "sales123_3"
# [15] "totsales_1" "totsales_4" "totsales_3" "totsales_2" "totqty_1" "totqty_5" "totqty_2"
# [22] "totqty_4" "totqty_3" "unqsales_1" "unqsales_4" "unqsales_2" "unqsales_3"