从第二次遇到开始用R代码替换字符串中的字符值

时间:2018-07-12 13:13:39

标签: r regex rscript

我在R中有一些输入字符串。 输入字符串为:

  • abcde, abcdabcd, apapap

输出字符串应为:

  • abcde, abcdbcde, apbqcr

我需要执行此操作的R代码。我们应该能够迭代字符串,计算字符串中[a-z]个字符的出现次数,并将字符循环递增1。这意味着a + 1 = bb + 2 = d,依此类推。要忽略的数字字符串。

3 个答案:

答案 0 :(得分:1)

您可以这样做:

library(purrr);library(magrittr)

vec <- c("abcde", "abcdabcd", "apapap","aaaa","zzzz")

letters2counts <- 1:26
names(letters2counts) <- letters[1:26]

num_vec <- sapply(vec,strsplit,split="") %>% map(~letters2counts[.] %>% unname)

counts  <- map(num_vec,~{u<-table(.);cbind(as.numeric(row.names(u)),as.numeric(u))})    

fun1 <- function(x,n) {
    x=x
    used_inds <- NULL
    for(i in unique(x)) {
        nn <- n[,2][n[,1] %in% i]
        if(nn == 0) next;
        ind<- which(x %in% i) %>% setdiff(.,used_inds)
        x[ind] <- x[ind]+0:(nn-1)
        used_inds <- c(ind,used_inds)
    }
    x[x > 26] <- x[x > 26] - 26
    return(x)
}

num_vec_calc <- Map(fun1,x=num_vec,n=counts)

map(num_vec_calc,~names(letters2counts)[.]%>% paste(.,collapse=""))

结果:

$abcde
[1] "abcde"

$abcdabcd
[1] "abcdbcde"

$apapap
[1] "apbqcr"

$aaaa
[1] "abcd"

$zzzz
[1] "zabc"

答案 1 :(得分:1)

fun <- function(x){
  a <- strsplit(x, "")[[1]]         # split the string to letters
  b <- strtoi(a, base = 36) - 9     # convert the letters to integers
  c <- numeric()
  c[1] <- b[1]

  if(length(b) > 1){
    for(i in 2:length(b)){
      c[i] <- b[i] + sum(b[1:(i-1)] == b[i])
    }
  }

  c <- c %% 26 ; c[c==0] <- 26
  result <- paste0(letters[c], collapse = "")
  return(result)
}

# fun("a")
# [1] "a"
# fun("zzz")
# [1] "zab"
# fun("abcde")
# [1] "abcde"
# fun("abcdabcd")
# [1] "abcdbcde"
# fun("apapap")
# [1] "apbqcr"

答案 2 :(得分:1)

increment <- function(x) {
  x <- strsplit(x, "")[[1L]]
  # now iterate through unique letters in string
  done <- vector(mode = "logical", length = length(x))
  for (l in unique(x)) {
    ln <- which(letters == l)
    where_m <- !done & l == x
    incrm_by <- 0:(sum(where_m) - 1)
    # Modulo operator tricky when indexing starts at 1
    x[where_m] <- letters[(ln + incrm_by - 1) %% 26 + 1]
    done[where_m] <- TRUE
  }
  paste(x, collapse = "")
}

sapply(x, increment)

     abcde   abcdabcd     apapap 
   "abcde" "abcdbcde"   "apbqcr"

请注意,第二z会增加到a

vec <- c("abcde", "abcdabcd", "apapap","aaaa","zzzzzz")
sapply(vec, increment)
     abcde   abcdabcd     apapap       aaaa     zzzzzz 
   "abcde" "abcdbcde"   "apbqcr"     "abcd"   "zabcde"