我试图找到一种快速方法在R中搜索数组中的特定字符串,有点像游戏Boggle,除非你知道这个词是在前面。
您可以按以下方向移动字符串的下一个字母:向上,向下,向右或向左
比如说一个简单的例子,你有一个表格数组:
> G
A, Q, A, Q, Q,
A, Q, P, Q, Q,
Q, Q, P, L, Q,
Q, Q, Q, E, Q
并且您想要使用字符串APPLE
将函数应用于G,函数返回TRUE
,此数组中存在APPLE
,FALSE
如果它没有按'吨
是否存在可以做到这一点的预制功能或包,或者有一种聪明的方法来做到这一点,我在处理R中的字符串方面相对较新,而我正在努力看到一种方式。
任何帮助非常感谢。感谢。
答案 0 :(得分:2)
这将首先检查你的单词中是否存在数组中不存在的任何字符,然后检查数组中的字符数是否足以满足单词中的重复字母
word <- strsplit("APPLE", "")
pool <- c("A", "Q", "A", "Q",
"Q", "A", "Q", "P",
"Q", "Q", "Q", "Q",
"P", "L", "Q", "Q",
"Q", "Q", "E", "Q")
t.word <- table(word)
t.pool <- table(pool)
length(setdiff(names(t.word), names(t.pool))) == 0
min(t.pool[names(t.word)] - t.word) >= 0
最后两个函数都会输出TRUE
以显示word
中的所有字母都存在于pool
中且word
中的单个字母的计数不是大于pool
以函数形式输出TRUE
如果找到,否则FALSE
word.find <- function(word, pool) {
t.word <- table(strsplit(word, ""))
t.pool <- table(pool)
length(setdiff(names(t.word), names(t.pool))) == 0 & min(t.pool[names(t.word)] - t.word) >= 0
}
word.find("APPLE", pool)
[1] TRUE
word.find("APPLES", pool)
[1] FALSE
word.find("APPLEE", pool)
[1] FALSE
答案 1 :(得分:2)
此功能仅使用基础R
功能
ipStream
.connect(routeStream)
.keyBy(_ => 0, _ => 0)
.flatMap(new MyRichCoFlatMapFunction) // with ValueState[RoutingTable]
<强> USAGE 强>
search_string = function(matrix_array, word_to_search){
position = data.frame(NA,NA,NA) #Create empty dataframe
word_to_search_inv = sapply(lapply(strsplit(word_to_search, NULL), rev), paste, collapse="") #Reverse word_to_search
for (i in 1:nrow(matrix_array)){
str_row = paste((matrix_array[i,]),collapse = "") #Collapse entire row into a string
if (grepl(word_to_search,str_row)) { #Check if the word_to_search is in the string towards right
position = rbind(position,c(i,paste(gregexpr(word_to_search, str_row)[[1]], collapse = ', '),"RIGHT")) #Get position and add it to the dataframe
}
if (grepl(word_to_search_inv,str_row)) {#Check if the word_to_search is in the string towards left (by checking for reverse of word_to_search)
position = rbind(position,c(i,paste(gregexpr(word_to_search_inv, str_row)[[1]], collapse = ', '),"LEFT"))
}
}
for (j in 1:ncol(matrix_array)){
str_column = paste((matrix_array[,j]),collapse = "")
if (grepl(word_to_search, str_column)) { #Check if the word_to_search is in the string towards down
position = rbind(position, c(paste(gregexpr(word_to_search, str_column)[[1]], collapse = ', '),j,"DOWN"))
}
if (grepl(word_to_search_inv, str_column)) { #Check if the word_to_search is in the string towards up
position = rbind(position, c(paste(gregexpr(word_to_search_inv, str_column)[[1]], collapse = ', '),j,"UP"))
}
}
colnames(position) = c("ROW","COLUMN","DIRECTION")
position = position[c(2:nrow(position)),]
rownames(position) = NULL
return(position) #Return the datafram containing row, columnm, and direction where word_to_match is found
}
答案 2 :(得分:1)
添加另一种方法:
board = structure(c("A", "A", "Q", "Q", "Q", "Q", "Q", "Q", "A", "P",
"P", "Q", "Q", "Q", "L", "E", "Q", "Q", "Q", "Q"), .Dim = 4:5, .Dimnames = list(
NULL, NULL))
word = "APPLE"
我们从:
开始matches = lapply(strsplit(word, NULL)[[1]], function(x) which(x == board, arr.ind = TRUE))
这是一个简单的 - 可能不可避免的 - 搜索“板”的索引,匹配单词的每个字母。它是一个包含行/列索引的“列表”,如:
#[[1]]
# row col
#[1,] 1 1
#[2,] 2 1
#[3,] 1 3
#
#[[2]]
# row col
#[1,] 2 3
#[2,] 3 3
#
##.....
有了这个,我们需要逐步找出每个元素中的索引是否在下一个元素中具有邻居(即右/左/上/下单元格)。例如。我们需要这样的东西:
as.matrix(find_neighbours(matches[[1]], matches[[2]], dim(board)))
# [,1] [,2]
#[1,] FALSE FALSE
#[2,] FALSE FALSE
#[3,] TRUE FALSE
告诉我们,matches[[1]]
的第3行是matches[[2]]
的第1行的邻居,即[1, 3]
和[2, 3]
确实是相邻的单元格。对于“匹配”中的每个连续元素,我们需要这个:
are_neighs = Map(function(x, y) which(find_neighbours(x, y, dim(board)), TRUE),
matches[-length(matches)], matches[-1])
are_neighs
#[[1]]
# [,1] [,2]
#[1,] 3 1
#
#[[2]]
# [,1] [,2]
#[1,] 2 1
#[2,] 1 2
#
#[[3]]
# [,1] [,2]
#[1,] 2 1
#
#[[4]]
# [,1] [,2]
#[1,] 1 1
现在我们有成对(“i”和“i + 1”)邻居匹配,我们需要完成链。对于此示例,我们希望有一个像c(1, 2, 1, 1)
这样的向量,其中包含are_neighs[[1]]
的第1行与are_neighs[[2]]
的第2行链接的信息,该第2行与第1行链接与are_neighs[[3]]
的第1行链接的are_neighs[[4]]
。这闻起来像是一个“igraph”问题,但我对它并不熟悉(希望有人有更好的想法),所以这是一种天真的方法来获得链接:
row_connections = matrix(NA_integer_, nrow(are_neighs[[1]]), length(are_neighs))
row_connections[, 1] = 1:nrow(are_neighs[[1]])
cur = are_neighs[[1]][, 2]
for(i in 1:(length(are_neighs) - 1)) {
im = match(cur, are_neighs[[i + 1]][, 1])
cur = are_neighs[[i + 1]][, 2][im]
row_connections[, i + 1] = im
}
row_connections = row_connections[complete.cases(row_connections), , drop = FALSE]
返回:
row_connections
# [,1] [,2] [,3] [,4]
#[1,] 1 2 1 1
现在有了这个向量,我们可以从“are_neighs”中提取相应的链:
Map(function(x, i) x[i, ], are_neighs, row_connections[1, ])
#[[1]]
#[1] 3 1
#
#[[2]]
#[1] 1 2
#
#[[3]]
#[1] 2 1
#
#[[4]]
#[1] 1 1
可用于从“匹配”中提取适当的索引行/列链:
ans = vector("list", nrow(row_connections))
for(i in 1:nrow(row_connections)) {
connect = Map(function(x, i) x[i, ], are_neighs, row_connections[i, ])
ans[[i]] = do.call(rbind, Map(function(x, i) x[i, ], matches, c(connect[[1]][1], sapply(connect, "[", 2))))
}
ans
#[[1]]
# row col
#[1,] 1 3
#[2,] 2 3
#[3,] 3 3
#[4,] 3 4
#[5,] 4 4
将所有内容包装在函数中(find_neighbours
在内部定义):
library(Matrix)
ff = function(word, board)
{
matches = lapply(strsplit(word, NULL)[[1]], function(x) which(x == board, arr.ind = TRUE))
find_neighbours = function(x, y, d)
{
neighbours = function(i, j, d = d)
{
ij = rbind(cbind(i, j + c(-1L, 1L)), cbind(i + c(-1L, 1L), j))
ijr = ij[, 1]; ijc = ij[, 2]
ij = ij[((ijr > 0L) & (ijr <= d[1])) & ((ijc > 0L) & (ijc <= d[2])), ]
ij[, 1] + (ij[, 2] - 1L) * d[1]
}
x.neighs = lapply(1:nrow(x), function(i) neighbours(x[i, 1], x[i, 2], dim(board)))
y = y[, 1] + (y[, 2] - 1L) * d[1]
x.sparse = sparseMatrix(i = unlist(x.neighs),
j = rep(seq_along(x.neighs), lengths(x.neighs)),
x = 1L, dims = c(prod(d), length(x.neighs)))
y.sparse = sparseMatrix(i = y, j = seq_along(y), x = 1L, dims = c(prod(d), length(y)))
ans = crossprod(x.sparse, y.sparse, boolArith = TRUE)
ans
}
are_neighs = Map(function(x, y) which(find_neighbours(x, y, dim(board)), TRUE), matches[-length(matches)], matches[-1])
row_connections = matrix(NA_integer_, nrow(are_neighs[[1]]), length(are_neighs))
row_connections[, 1] = 1:nrow(are_neighs[[1]])
cur = are_neighs[[1]][, 2]
for(i in 1:(length(are_neighs) - 1)) {
im = match(cur, are_neighs[[i + 1]][, 1])
cur = are_neighs[[i + 1]][, 2][im]
row_connections[, i + 1] = im
}
row_connections = row_connections[complete.cases(row_connections), , drop = FALSE]
ans = vector("list", nrow(row_connections))
for(i in 1:nrow(row_connections)) {
connect = Map(function(x, i) x[i, ], are_neighs, row_connections[i, ])
ans[[i]] = do.call(rbind, Map(function(x, i) x[i, ], matches, c(connect[[1]][1], sapply(connect, "[", 2))))
}
ans
}
我们可以尝试一下:
ff("APPLE", board)
#[[1]]
# row col
#[1,] 1 3
#[2,] 2 3
#[3,] 3 3
#[4,] 3 4
#[5,] 4 4
并且有多场比赛:
ff("AQQP", board)
#[[1]]
# row col
#[1,] 1 1
#[2,] 1 2
#[3,] 2 2
#[4,] 2 3
#
#[[2]]
# row col
#[1,] 1 3
#[2,] 1 2
#[3,] 2 2
#[4,] 2 3
#
#[[3]]
# row col
#[1,] 1 3
#[2,] 1 4
#[3,] 2 4
#[4,] 2 3
虽然它在返回多个匹配时很灵活,但它并不返回所有可能的匹配,简而言之,这是因为在构建邻居链时使用match
- 可以使用线性搜索相反,但是 - 当下 - 增加了重要的代码复杂性。
答案 3 :(得分:0)
我写了下面的内容,它运行良好,速度快,并且可以翻译成任何其他语言。
给定图G和字典,它搜索字典然后测试G是否有任何字母对应于它需要检查的每个单词的第一个字母。接下来,它检查由前一个TRUE值的TRUE值+ delta的索引找到的任何邻居是否等于该单词的第二个。而且这种情况还在继续。
如果在任何时候发现这不是TRUE,则函数结束并返回FALSE。此外,如果您按字母组合的“稀有性”对字典进行排序,则该功能将更快地运行。
#function to check if a word appears in a graph
dict_check <- function(dictionary, G) {
#Run thru dictionary and check if word is G
#If at any point after a word check, it doesn't appear, break and return FALSE
n <- length(dictionary)
count_1 <- 0 #sum of words checked
count_2 <- 0 #sum of words successfully found
delta <- matrix(c(-1, 0, 1, 0,
0, -1, 0, 1),
byrow = T, nrow = 4, ncol = 2)
for (dc in 1:n) {
word <- dictionary[dc]
#Add 1 for each word checked
count_1 <- count_1 + 1
#Split word into a vector
W <- unlist(strsplit(word, ""))
#Boolean matrix for 1st letter of word, if not there, end and return False
G_bool <- G == W[1]
if(sum(G_bool) == 0) {
return(FALSE)
}
#Fetch indices of True values for 1st letter of word
I <- which(G_bool == T, arr.ind = T)
#Loop thru word and check if neighbours match next letter of word,
#for all letters of word
#if at any point after iteration of a letter in word whereby G is all False,
#return False for word_check
last <- length(W)
for (w in 2:last) {
#For each index in I, check if wordbox range,
#and check if neighbours ar equal to W[2, ...]
for (i in 1:nrow(I)) {
for (d in 1:nrow(delta)) {
#neighbour
k <- I[i, ] + delta[d, ]
#If neighbour is out of bounds of box then move onto next neighbour
#Each valid neighbour checked if is equal to next letter of word
#If it is equal set to neighbour to TRUE, and original position to FALSE
#If neighbour doesn't equal next letter, make original position FALSE anyway
G_bool[I[i, 1], I[i, 2]] <- FALSE #Set original position to FALSE
if (k[1] == 0 | k[1] > nrow(G) | k[2] == 0 | k[2] > ncol(G)) {
next} else if (G[k[1], k[2]] == W[w]) {
G_bool[k[1], k[2]] <- TRUE #Set neighbour to TRUE
}
}
}
#Check after each iteration of letter if any letters of subsequent
#letters appear, if yes, continue to next letter of word, if no, return
#FALSE for word check
if (sum(G_bool) == 0) {
return(FALSE)
}
#Update indices I for next TRUE in G_bool, corresponding to next letters found
I <- which(G_bool == T, arr.ind = T)
}
#Final check after word iteration is complete on G_bool
if (sum(G_bool) == 0) {
return(FALSE)
} else if (sum(G_bool) > 0) {
count_2 <- count_2 + 1 #Add 1 to count_2 if word successfully found
}
if (count_1 != count_2) {
return(FALSE)
}
}
#Final check
if (count_1 != count_2) {
return(FALSE)
} else
return(TRUE)
}