我想编写一个函数将excel列名转换为相应的数字。到目前为止,我想出的只是部分工作。也就是说,低位字母首先出现的输入(&#34; AB&#34;,AC&#34;等)工作正常。但它反过来不起作用(&#34; BA&#34;,&#34; CA&#34;等)。我已经将错误跟踪到了行y <- which(base::LETTERS==x)
,但我真的不明白这些布尔运算符如何处理向量。有什么建议吗?
#so to pass excel column-names directly, this function should do the trick
LettersToNumbers <- function(input){
x <- toupper(substring(input, c(1:nchar(input)), c(1:nchar(input)))) #parse input-string
y <- which(base::LETTERS==x) #letters to numbers
y <- rev(y) #reverse
#base26 conversion:
result <- 0
for (i in 1:length(y)){
result <- result + ( y[i]*26^(i-1) )
}
return(result)
}
实际上事实证明还有一些例子不起作用。这里有一些,我真的不明白发生了什么。
> which(LETTERS==c("A", "B"))
[1] 1 2
> which(LETTERS==c("A", "C"))
[1] 1
> which(LETTERS==c("A", "D"))
[1] 1 4
> which(LETTERS==c("D", "A"))
integer(0)
>
答案 0 :(得分:8)
这很快又脏,但我觉得它可以让你得到你想要的东西。它应该适用于任意字符串长度。
# Input: A string of letters s
# Output: Corresponding column number
LettersToNumbers <- function(s){
# Uppercase
s_upper <- toupper(s)
# Convert string to a vector of single letters
s_split <- unlist(strsplit(s_upper, split=""))
# Convert each letter to the corresponding number
s_number <- sapply(s_split, function(x) {which(LETTERS == x)})
# Derive the numeric value associated with each letter
numbers <- 26^((length(s_number)-1):0)
# Calculate the column number
column_number <- sum(s_number * numbers)
column_number
}
# Vectorize in case you want to pass more than one column name in a single call
LettersToNumbers <- Vectorize(LettersToNumbers)
# Quick tests
LettersToNumbers("A")
LettersToNumbers("Z")
LettersToNumbers("AA")
LettersToNumbers("BA")
LettersToNumbers("AAA")
LettersToNumbers(LETTERS)
如上面的评论中所述,您的代码的主要问题是矢量回收,此功能通过使用sapply
来避免。
答案 1 :(得分:2)
这些情况下的答案通常是使用%in%
,而不是==
。例如
which(LETTERS %in% c("D", "A"))
生成1 4
。它们不是你想要的顺序 - 所以,这将逐个应用这个功能。
sapply(c("D", "A"), function(x){which(LETTERS %in% x)})
生成4 1
。
答案 2 :(得分:1)
检查字符元素的长度,然后在LETTERS中添加由位置确定的位置:
TwoLet2Num <- function(chars) { if( nchar( substr(chars,2,2)) ){
res <- which(LETTERS==substr(chars,1,1))*26 + which(LETTERS ==substr(chars,2,2))
} else { res= which(LETTERS==substr(chars,1,1) ) }
return(res)}
答案 3 :(得分:1)
对于那些想要来回转换的人(都在一个函数中): 输入数字(27),输出字母('AA') 输入字母('AA'),输出数字(27)
xlcolconv <- function(col){
# test: 1 = A, 26 = Z, 27 = AA, 703 = AAA
if (is.character(col)) {
# codes from https://stackoverflow.com/a/34537691/2292993
s = col
# Uppercase
s_upper <- toupper(s)
# Convert string to a vector of single letters
s_split <- unlist(strsplit(s_upper, split=""))
# Convert each letter to the corresponding number
s_number <- sapply(s_split, function(x) {which(LETTERS == x)})
# Derive the numeric value associated with each letter
numbers <- 26^((length(s_number)-1):0)
# Calculate the column number
column_number <- sum(s_number * numbers)
return(column_number)
} else {
n = col
letters = ''
while (n > 0) {
r = (n - 1) %% 26 # remainder
letters = paste0(intToUtf8(r + utf8ToInt('A')), letters) # ascii
n = (n - 1) %/% 26 # quotient
}
return(letters)
}
}
答案 4 :(得分:0)
# Setup converter index numbers
converter <- 1:702
# Excel column names in order
names(converter) <- do.call(paste0, expand.grid(LETTERS, c("",LETTERS))[,2:1])
ExcelColumnNames <- c("A", "Z", "AA", "AZ", "ZZ")
converter[ExcelColumnNames] # show excel column numbers
# A Z AA AZ ZZ
# 1 26 27 52 702
答案 5 :(得分:0)
一种更快和(已经)矢量化的替代解决方案[不需要Vectorize
]
letters2numbers <- function(x){
# letters encoding
encoding <- setNames(seq_along(LETTERS), LETTERS)
# uppercase
x <- toupper(x)
# convert string to a list of vectors of single letters
x <- strsplit(x, split = "")
# convert each letter to the corresponding number
# calculate the column number
# return a numeric vector
sapply(x, function(xs) sum(encoding[xs] * 26^((length(xs)-1):0)))
}
letters2numbers("Z")
#> [1] 26
letters2numbers(c("A", "BZ", "CBA", "BDWGN"))
#> [1] 1 78 2081 1000000
基准:
microbenchmark::microbenchmark(
LettersToNumbers("Z"),
letters2numbers("Z")
)
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> LettersToNumbers("Z") 60.510 61.9065 70.23292 64.0005 67.957 262.051 100
#> letters2numbers("Z") 20.481 21.4115 26.70360 22.3420 24.204 140.568 100
microbenchmark::microbenchmark(
LettersToNumbers(c("A", "BZ", "CBA", "BDWGN")),
letters2numbers(c("A", "BZ", "CBA", "BDWGN"))
)
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> LettersToNumbers(c("A", "BZ", "CBA", "BDWGN")) 152.669 158.721 206.97909 171.7530 220.8595 581.819 100
#> letters2numbers(c("A", "BZ", "CBA", "BDWGN")) 30.255 32.582 42.47789 35.1425 43.9865 174.547 100