拥有带有colnames的data.frame
nam <- c("a", paste0("a_", seq(12)))
"a" "a_1" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12"
如何将数字的数字增加1?
预期结果将是
"a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
到目前为止,我的解决方案看起来非常复杂......是否有比
更简单的方法increment_names <- function(nam){
where <- regexpr("\\d", nam)
ind <- which(where > 0)
increment <- as.numeric(substring(nam[ind], where[ind])) + 1
substring(nam[ind], where[ind]) <- as.character(increment)
nam
}
> increment_names(nam)
[1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
答案 0 :(得分:6)
基础regmatches
解决方案:
r <- regexpr("\\d+",nam)
regmatches(nam,r) <- as.numeric(regmatches(nam,r))+1
nam
# [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" ...
答案 1 :(得分:4)
只要你的模式是&#34; nonnumbers_numbers&#34;:
nums <- as.numeric(gsub("[^0-9]", "", nam))
nam[!is.na(nums)] <- paste0(gsub("[0-9]", "", nam), nums + 1)[!is.na(nums)]
产地:
[1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12"
[13] "a_13"
答案 2 :(得分:4)
使用gsubfn
包可以做一些简单的事情
library(gsubfn)
gsubfn("\\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9" "a_10" "a_11" "a_12" "a_13"
这适用于任何模式,您不需要假设&#34; nonnumbers_numbers &#34;上面提到的模式,例如
(nam <- c("a", paste0(seq(12), "_a")))
## [1] "a" "1_a" "2_a" "3_a" "4_a" "5_a" "6_a" "7_a" "8_a" "9_a" "10_a" "11_a" "12_a"
gsubfn("\\d+", function(x) as.numeric(x) + 1, nam)
## [1] "a" "2_a" "3_a" "4_a" "5_a" "6_a" "7_a" "8_a" "9_a" "10_a" "11_a" "12_a" "13_a"
答案 3 :(得分:2)
你可以试试&#34; ore&#34;包,你的替换可以是函数,如:
nam <- c("a", paste0("a_", seq(12)))
nam
library(ore)
ore.subst("-?\\d+", function(x) as.numeric(x) + 1, nam, all = TRUE)
# [1] "a" "a_2" "a_3" "a_4" "a_5" "a_6" "a_7" "a_8" "a_9"
# [10] "a_10" "a_11" "a_12" "a_13"
这与&#34; gsubfn&#34;的功能类似。包,但(至少在这种情况下)效率更高。以下是一些基准测试:
library(stringi)
set.seed(1)
nam <- stri_rand_strings(10000, 5, pattern = "[A-J0-9]")
f_ORE <- function(invec = nam) {
ore.subst("-?\\d+", function(x) as.numeric(x) + 1, invec, all = TRUE)
}
f_GSUBFN <- function(invec = nam) {
gsubfn("\\d+", function(x) as.numeric(x) + 1, invec)
}
f_BASE <- function(invec = nam) {
r <- regexpr("\\d+", invec)
regmatches(invec, r) <- as.numeric(regmatches(invec, r))+1
invec
}
system.time(f_GSUBFN())
# user system elapsed
# 5.48 0.01 5.50
library(microbenchmark)
microbenchmark(f_BASE(), f_ORE())
# Unit: milliseconds
# expr min lq mean median uq max neval
# f_BASE() 141.79743 149.58914 161.49041 152.81038 162.10550 357.6483 100
# f_ORE() 57.35309 59.58433 65.84678 60.92218 68.40062 116.7714 100
请注意,虽然&#34; ore&#34;方法和&#34; gsubfn&#34;进近区域相同,它们似乎与基础R方法略有不同。
考虑:
> identical(f_ORE(), f_GSUBFN())
[1] TRUE
## Edge case...
> nam[988]
[1] "0G019"
> f_ORE()[988] ## 019 becomes 20 (without the leading zero)
[1] "1G20"
> f_GSUBFN()[988] ## Same
[1] "1G20"
> f_BASE()[988] ## This seems off...
[1] "1G019"