Question

我有一个干净的字母输入流，我想将其转换为1到27（所有字母包括空格）的数字向量。我无法想象嵌套的for循环是执行此操作的最佳方法。可能没有循环吗？

space_letters = append(letters, " ")

text_to_numbers = function (input_stream) {

  input_stream = unlist(strsplit(input_stream, split = ""))

  for(i in 1:length(input_stream)) {
    for(j in 1:length(space_letters) {
      if(input_stream[i] == space_letters[j]) {
        input_stream[i] = j
      }
    }
  }
  return(as.integer(input_stream))
}

Answer 1

这是使用match

的基本R替代项

text_to_numbers <- function(ss)
    as.numeric(sapply(strsplit(ss, ""), function(x) match(x, c(letters, " "))))

text_to_numbers("abcdef")
#[1] 1 2 3 4 5 6

text_to_numbers("the cat")
#[1] 20  8  5 27  3  1 20

或等效的purrr / stringr：

library(tidyverse)
text_to_numbers <- function(ss)
    map_int(str_split(ss, "", simplify = T), ~match(.x, c(letters, " ")))

Answer 2

定义问题后，另一种可能的解决方案是利用ASCII编码获得所需的数字代码。

v <- 'AbcdefGHijklmnopqrstuvwxyz '

char_to_num <- function(x) {
    v <- as.numeric(charToRaw(tolower(x))) - 96
    ifelse(v==-64, 27, v)
}

char_to_num(v)

[1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27

Answer 3

这是将命名向量用作字典样式数据结构的一种base-R方式。您还可以使用tidyverse函数执行相同的操作，如下所示。

请注意，按照本文所述，假设您正在传递长度为1的字符向量；如果要传递更长的向量，则需要进行一些修改。

lipsum <- "lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum"

text_to_numbers <- function (input_stream) {
  lookup <- 1:27
  names(lookup) <- append(letters, " ")
  input_stream = unlist(strsplit(input_stream, split = ""))
  numbers <- sapply(input_stream, function(x) lookup[[x]])
  names(numbers) <- NULL
  return(numbers)
}
text_to_numbers(lipsum)
#>   [1] 12 15 18  5 13 27  9 16 19 21 13 27  4 15 12 15 18 27 19  9 20 27  1
#>  [24] 13  5 20 27  3 15 14 19  5  3 20  5 20 21 18 27  1  4  9 16  9 19  3
#>  [47]  9 14  7 27  5 12  9 20 27 19  5  4 27  4 15 27  5  9 21 19 13 15  4
#>  [70] 27 20  5 13 16 15 18 27  9 14  3  9  4  9  4 21 14 20 27 21 20 27 12
#>  [93]  1  2 15 18  5 27  5 20 27  4 15 12 15 18  5 27 13  1  7 14  1 27  1
#> [116] 12  9 17 21  1 27 21 20 27  5 14  9 13 27  1  4 27 13  9 14  9 13 27
#> [139] 22  5 14  9  1 13 27 17 21  9 19 27 14 15 19 20 18 21  4 27  5 24  5
#> [162] 18  3  9 20  1 20  9 15 14 27 21 12 12  1 13  3 15 27 12  1  2 15 18
#> [185]  9 19 27 14  9 19  9 27 21 20 27  1 12  9 17 21  9 16 27  5 24 27  5
#> [208]  1 27  3 15 13 13 15  4 15 27  3 15 14 19  5 17 21  1 20 27  4 21  9
#> [231] 19 27  1 21 20  5 27  9 18 21 18  5 27  4 15 12 15 18 27  9 14 27 18
#> [254]  5 16 18  5  8  5 14  4  5 18  9 20 27  9 14 27 22 15 12 21 16 20  1
#> [277] 20  5 27 22  5 12  9 20 27  5 19 19  5 27  3  9 12 12 21 13 27  4 15
#> [300] 12 15 18  5 27  5 21 27  6 21  7  9  1 20 27 14 21 12 12  1 27 16  1
#> [323] 18  9  1 20 21 18 27  5 24  3  5 16 20  5 21 18 27 19  9 14 20 27 15
#> [346]  3  3  1  5  3  1 20 27  3 21 16  9  4  1 20  1 20 27 14 15 14 27 16
#> [369] 18 15  9  4  5 14 20 27 19 21 14 20 27  9 14 27  3 21 12 16  1 27 17
#> [392] 21  9 27 15  6  6  9  3  9  1 27  4  5 19  5 18 21 14 20 27 13 15 12
#> [415] 12  9 20 27  1 14  9 13 27  9  4 27  5 19 20 27 12  1  2 15 18 21 13

library(tidyverse)
tidy_text_to_numbers <- function(input_stream){
  lookup <- 1:27 %>%
    set_names(append(letters, " "))
  input_stream %>%
    str_split("") %>%
    unlist %>%
    map_int(~ lookup[[.]])
}
tidy_text_to_numbers(lipsum)
#>   [1] 12 15 18  5 13 27  9 16 19 21 13 27  4 15 12 15 18 27 19  9 20 27  1
#>  [24] 13  5 20 27  3 15 14 19  5  3 20  5 20 21 18 27  1  4  9 16  9 19  3
#>  [47]  9 14  7 27  5 12  9 20 27 19  5  4 27  4 15 27  5  9 21 19 13 15  4
#>  [70] 27 20  5 13 16 15 18 27  9 14  3  9  4  9  4 21 14 20 27 21 20 27 12
#>  [93]  1  2 15 18  5 27  5 20 27  4 15 12 15 18  5 27 13  1  7 14  1 27  1
#> [116] 12  9 17 21  1 27 21 20 27  5 14  9 13 27  1  4 27 13  9 14  9 13 27
#> [139] 22  5 14  9  1 13 27 17 21  9 19 27 14 15 19 20 18 21  4 27  5 24  5
#> [162] 18  3  9 20  1 20  9 15 14 27 21 12 12  1 13  3 15 27 12  1  2 15 18
#> [185]  9 19 27 14  9 19  9 27 21 20 27  1 12  9 17 21  9 16 27  5 24 27  5
#> [208]  1 27  3 15 13 13 15  4 15 27  3 15 14 19  5 17 21  1 20 27  4 21  9
#> [231] 19 27  1 21 20  5 27  9 18 21 18  5 27  4 15 12 15 18 27  9 14 27 18
#> [254]  5 16 18  5  8  5 14  4  5 18  9 20 27  9 14 27 22 15 12 21 16 20  1
#> [277] 20  5 27 22  5 12  9 20 27  5 19 19  5 27  3  9 12 12 21 13 27  4 15
#> [300] 12 15 18  5 27  5 21 27  6 21  7  9  1 20 27 14 21 12 12  1 27 16  1
#> [323] 18  9  1 20 21 18 27  5 24  3  5 16 20  5 21 18 27 19  9 14 20 27 15
#> [346]  3  3  1  5  3  1 20 27  3 21 16  9  4  1 20  1 20 27 14 15 14 27 16
#> [369] 18 15  9  4  5 14 20 27 19 21 14 20 27  9 14 27  3 21 12 16  1 27 17
#> [392] 21  9 27 15  6  6  9  3  9  1 27  4  5 19  5 18 21 14 20 27 13 15 12
#> [415] 12  9 20 27  1 14  9 13 27  9  4 27  5 19 20 27 12  1  2 15 18 21 13

由reprex package（v0.2.0）于2018-10-01创建。

Answer 4

这是data.table的替代方案，其中包含setkey()创建的字典。

library(data.table)

input_stream  <- "lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum"

dict <- data.table(number = 1:27, letter = append(letters, " "))
setkey(dict, letter)

dict[strsplit(input_stream , "")]$number

# [1] 12 15 18  5 13 27  9 16 19 21 13 27  4 15 12 15 18 27 19  9 20 27
# [23]  1 13  5 20 27  3 15 14 19  5  3 20  5 20 21 18 27  1  4  9 16  9
# [45] 19  3  9 14  7 27  5 12  9 20 27 19  5  4 27  4 15 27  5  9 21 19
# [67] 13 15  4 27 20  5 13 16 15 18 27  9 14  3  9  4  9  4 21 14 20 27
# [89] 21 20 27 12  1  2 15 18  5 27  5 20 27  4 15 12 15 18  5 27 13  1
# [111]  7 14  1 27  1 12  9 17 21  1 27 21 20 27  5 14  9 13 27  1  4 27
# [133] 13  9 14  9 13 27 22  5 14  9  1 13 27 17 21  9 19 27 14 15 19 20
# [155] 18 21  4 27  5 24  5 18  3  9 20  1 20  9 15 14 27 21 12 12  1 13
# [177]  3 15 27 12  1  2 15 18  9 19 27 14  9 19  9 27 21 20 27  1 12  9
# [199] 17 21  9 16 27  5 24 27  5  1 27  3 15 13 13 15  4 15 27  3 15 14
# [221] 19  5 17 21  1 20 27  4 21  9 19 27  1 21 20  5 27  9 18 21 18  5
# [243] 27  4 15 12 15 18 27  9 14 27 18  5 16 18  5  8  5 14  4  5 18  9
# [265] 20 27  9 14 27 22 15 12 21 16 20  1 20  5 27 22  5 12  9 20 27  5
# [287] 19 19  5 27  3  9 12 12 21 13 27  4 15 12 15 18  5 27  5 21 27  6
# [309] 21  7  9  1 20 27 14 21 12 12  1 27 16  1 18  9  1 20 21 18 27  5
# [331] 24  3  5 16 20  5 21 18 27 19  9 14 20 27 15  3  3  1  5  3  1 20
# [353] 27  3 21 16  9  4  1 20  1 20 27 14 15 14 27 16 18 15  9  4  5 14
# [375] 20 27 19 21 14 20 27  9 14 27  3 21 12 16  1 27 17 21  9 27 15  6
# [397]  6  9  3  9  1 27  4  5 19  5 18 21 14 20 27 13 15 12 12  9 20 27
# [419]  1 14  9 13 27  9  4 27  5 19 20 27 12  1  2 15 18 21 13

Answer 5

我们还可以使用以下因素：

  input_stream <- "the cat"
  input_stream <- strsplit(input_stream, "")[[1]]
  as.numeric(factor(input_stream,c(letters," ")))
  # [1] 20  8  5 27  3  1 20

优化此for循环（字母到数字）功能的最佳方法是什么？

5 个答案: