我正在尝试创建一个小脚本,以帮助我在此填字游戏应用中作弊,这样我就可以击败正在和我擦地板的妻子。
该应用程序提供了几个字符,然后您应该将它们放入填字游戏中。我想要一种快速的方法来减少可能出现的单词。这是我目前的尝试:
library(tidyverse)
dat <- read_lines("https://raw.githubusercontent.com/dwyl/english-words/master/words.txt")
str_extract_all(dat, "\\b[ilrfle]{2,6}\\b") %>%
compact() %>% unlist()
#> [1] "el" "el" "fi" "life" "free" "fe" "rifle"
#> [8] "fire" "reef" "fire" "le" "relief" "relief" "le"
#> [15] "fere" "fell" "le" "fell" "er" "free" "fire"
#> [22] "fire" "free" "free" "fire" "fire" "fire" "fire"
#> [29] "reef" "life" "free" "eel" "free" "file" "fire"
#> [36] "refer" "eel" "free" "fire" "free" "re" "reef"
#> [43] "file" "free" "ee" "eel" "eel" "eel" "eeler"
#> [50] "eel" "eelier" "eel" "eel" "eel" "eel" "eel"
#> [57] "eer" "er" "eerie" "eerier" "ef" "eff" "effeir"
#> [64] "efl" "eir" "el" "el" "elf" "elf" "elf"
#> [71] "elf" "elf" "elf" "elf" "ell" "lil" "ell"
#> [78] "ell" "ell" "lil" "er" "erer" "erf" "erl"
#> [85] "err" "free" "free" "free" "free" "free" "fee"
#> [92] "fee" "feel" "feeler" "feere" "feerie" "fee" "fee"
#> [99] "fee" "fee" "feff" "fei" "feif" "feirie" "fele"
#> [106] "fell" "fell" "fell" "fell" "feel" "fer" "fer"
#> [113] "fer" "fere" "ferfel" "ferie" "ferlie" "ferr" "ferri"
#> [120] "ff" "fie" "fief" "fie" "fie" "fiel" "fieri"
#> [127] "fifer" "fife" "fifie" "fil" "file" "file" "file"
#> [134] "file" "file" "fili" "fili" "filii" "fill" "fill"
#> [141] "fill" "fille" "filler" "filler" "filler" "filli" "fill"
#> [148] "fill" "fill" "fill" "free" "fir" "fir" "fir"
#> [155] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [162] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [169] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [176] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [183] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [190] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [197] "fire" "fire" "fire" "fire" "free" "fire" "fire"
#> [204] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [211] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [218] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [225] "fire" "fire" "fire" "fire" "firer" "fire" "fire"
#> [232] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [239] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [246] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [253] "fire" "fire" "fire" "fire" "fire" "fire" "fire"
#> [260] "fire" "fir" "fir" "fir" "reel" "reeler" "flee"
#> [267] "fleer" "flier" "flier" "free" "fll" "ferri" "flrie"
#> [274] "le" "free" "lie" "fire" "fee" "free" "free"
#> [281] "free" "free" "free" "free" "free" "free" "free"
#> [288] "free" "free" "free" "free" "free" "free" "free"
#> [295] "free" "free" "free" "free" "free" "free" "free"
#> [302] "free" "free" "free" "free" "free" "free" "free"
#> [309] "free" "free" "free" "free" "free" "free" "free"
#> [316] "free" "free" "free" "free" "free" "free" "free"
#> [323] "free" "free" "free" "free" "free" "free" "free"
#> [330] "free" "free" "free" "free" "free" "free" "free"
#> [337] "free" "free" "free" "free" "free" "free" "free"
#> [344] "free" "free" "free" "free" "free" "free" "free"
#> [351] "free" "freir" "frier" "frill" "frill" "frill" "frill"
#> [358] "frill" "free" "life" "fill" "fire" "relief" "free"
#> [365] "ill" "ll" "fire" "fi" "flier" "le" "er"
#> [372] "free" "ie" "ie" "ier" "ier" "if" "fere"
#> [379] "iffier" "ifree" "ii" "iii" "il" "il" "ile"
#> [386] "ile" "ill" "ll" "ill" "ill" "ill" "ill"
#> [393] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [400] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [407] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [414] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [421] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [428] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [435] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [442] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [449] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [456] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [463] "ill" "ill" "iller" "ill" "ill" "ill" "ill"
#> [470] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [477] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [484] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [491] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [498] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [505] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [512] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [519] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [526] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [533] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [540] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [547] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [554] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [561] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [568] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [575] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [582] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [589] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [596] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [603] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [610] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [617] "ill" "ill" "ill" "ill" "ill" "ill" "ill"
#> [624] "ll" "re" "ir" "ire" "free" "ll" "file"
#> [631] "fee" "eel" "free" "le" "lee" "lee" "leef"
#> [638] "leer" "lee" "lei" "lere" "lie" "lie" "lie"
#> [645] "lie" "liefer" "lier" "lierre" "life" "life" "life"
#> [652] "life" "life" "life" "life" "life" "life" "life"
#> [659] "life" "life" "life" "life" "life" "life" "life"
#> [666] "life" "life" "life" "life" "life" "life" "life"
#> [673] "life" "life" "life" "life" "life" "life" "life"
#> [680] "life" "life" "life" "life" "life" "life" "life"
#> [687] "life" "life" "life" "lifer" "life" "life" "life"
#> [694] "life" "life" "life" "life" "life" "life" "life"
#> [701] "life" "life" "life" "life" "life" "life" "life"
#> [708] "life" "life" "life" "lile" "lill" "eleele" "lire"
#> [715] "ll" "ll" "ller" "le" "fer" "fire" "life"
#> [722] "le" "lie" "fire" "free" "er" "er" "er"
#> [729] "free" "fire" "el" "er" "reeler" "fire" "reel"
#> [736] "le" "relief" "free" "filler" "free" "fire" "free"
#> [743] "free" "free" "free" "free" "free" "free" "free"
#> [750] "fire" "firer" "filer" "fire" "firer" "re" "re"
#> [757] "re" "re" "re" "re" "re" "re" "re"
#> [764] "re" "re" "re" "re" "re" "re" "re"
#> [771] "re" "re" "re" "re" "re" "re" "re"
#> [778] "re" "re" "re" "re" "re" "re" "re"
#> [785] "re" "re" "re" "re" "re" "re" "re"
#> [792] "re" "re" "re" "re" "reefer" "re" "reef"
#> [799] "reef" "re" "re" "re" "re" "re" "re"
#> [806] "re" "re" "re" "reeler" "re" "re" "reel"
#> [813] "reel" "reel" "re" "re" "re" "re" "reel"
#> [820] "reel" "re" "re" "re" "re" "re" "re"
#> [827] "re" "re" "re" "re" "re" "re" "re"
#> [834] "re" "re" "re" "re" "re" "re" "re"
#> [841] "re" "re" "re" "re" "re" "re" "re"
#> [848] "re" "re" "re" "re" "re" "re" "re"
#> [855] "re" "re" "re" "re" "re" "re" "re"
#> [862] "re" "re" "re" "re" "re" "re" "re"
#> [869] "re" "re" "re" "re" "re" "re" "re"
#> [876] "re" "re" "re" "re" "re" "re" "re"
#> [883] "re" "re" "re" "re" "re" "re" "re"
#> [890] "re" "re" "re" "re" "re" "re" "re"
#> [897] "re" "re" "re" "re" "re" "re" "re"
#> [904] "re" "re" "re" "re" "re" "re" "re"
#> [911] "re" "re" "re" "re" "re" "re" "re"
#> [918] "re" "re" "re" "re" "re" "re" "re"
#> [925] "re" "re" "re" "re" "re" "re" "re"
#> [932] "re" "re" "re" "re" "re" "re" "re"
#> [939] "re" "re" "re" "re" "re" "re" "re"
#> [946] "re" "re" "re" "re" "re" "re" "re"
#> [953] "re" "re" "re" "re" "re" "re" "re"
#> [960] "re" "re" "re" "re" "re" "re" "re"
#> [967] "re" "re" "re" "re" "re" "re" "re"
#> [974] "re" "re" "re" "re" "re" "re" "re"
#> [981] "re" "re" "re" "re" "re" "re" "re"
#> [988] "re" "re" "re" "re" "re" "re" "re"
#> [995] "re" "re" "re" "re" "re" "re" "re"
#> [1002] "re" "re" "re" "re" "re" "re" "re"
#> [1009] "re" "re" "re" "re" "re" "ref" "ref"
#> [1016] "refeel" "refel" "refell" "refer" "refile" "refill" "refire"
#> [1023] "refl" "refl" "reflee" "re" "re" "re" "re"
#> [1030] "refr" "re" "re" "rei" "re" "reif" "re"
#> [1037] "rei" "rel" "rel" "re" "re" "re" "re"
#> [1044] "re" "re" "relief" "relief" "relier" "re" "free"
#> [1051] "re" "re" "re" "re" "re" "re" "re"
#> [1058] "re" "re" "re" "re" "re" "rere" "re"
#> [1065] "re" "rere" "re" "re" "re" "re" "re"
#> [1072] "re" "re" "re" "re" "re" "re" "re"
#> [1079] "re" "re" "re" "re" "re" "rere" "re"
#> [1086] "re" "reree" "rereel" "re" "refer" "re" "re"
#> [1093] "re" "re" "re" "re" "re" "re" "re"
#> [1100] "re" "re" "re" "re" "re" "re" "re"
#> [1107] "re" "re" "re" "re" "re" "re" "re"
#> [1114] "re" "re" "re" "re" "re" "re" "re"
#> [1121] "re" "re" "re" "re" "re" "re" "re"
#> [1128] "re" "re" "re" "re" "re" "re" "re"
#> [1135] "re" "re" "re" "re" "re" "re" "re"
#> [1142] "re" "re" "re" "re" "re" "re" "re"
#> [1149] "re" "rere" "re" "re" "re" "re" "re"
#> [1156] "re" "re" "re" "re" "re" "re" "re"
#> [1163] "re" "re" "re" "re" "re" "re" "re"
#> [1170] "re" "re" "re" "re" "re" "re" "re"
#> [1177] "re" "re" "re" "re" "re" "re" "rfree"
#> [1184] "free" "rier" "rife" "rifer" "riff" "rifle" "rifle"
#> [1191] "rifler" "rifle" "rifle" "rile" "rill" "rille" "rill"
#> [1198] "fire" "rle" "eel" "fill" "free" "fire" "fi"
#> [1205] "free" "er" "filler" "ill" "life" "free" "ll"
#> [1212] "free" "life" "lifer" "file" "fire" "ell" "free"
#> [1219] "ll" "ll" "free" "fire" "life" "fire" "free"
#> [1226] "free" "ll" "ll" "re" "ll" "ll" "free"
#> [1233] "life" "reel" "free" "free" "free" "ll" "free"
#> [1240] "free" "free" "free" "life" "re" "life" "free"
#> [1247] "le" "free" "free" "free" "ll" "li" "re"
#> [1254] "er" "eer" "ll" "re" "er" "er" "ll"
#> [1261] "re" "er" "er" "ll" "re" "er" "ll"
#> [1268] "re" "lie" "ll" "re" "fire" "eel" "free"
#> [1275] "free"
由reprex package(v0.2.1)于2019-01-17创建
数据来自包含约500,000个英语单词的数据库。在此示例中,该应用给了我ilrfle
。因此,我希望所有仅使用 个字母的长度小于或等于6的单词。我将如何去做呢?我的问题是,它返回了一堆重复项,并且对于应用程序提供的某些组合也不起作用。
答案 0 :(得分:1)
我不确定我是否赞成作弊-我为你的妻子加油!但这是一个有趣的问题,所以我会忽略它;-)。
这是另一种方法。将dat
中的所有单词按字母顺序排序;还要按字母顺序对输入集中的字母进行排序。我们可以使用一个简单的正则表达式在字典中查找单词,每个单词中所包含的单词总数不超过指定数量。
library(tidyverse)
# Get the dictionary.
dat = read_lines("https://raw.githubusercontent.com/dwyl/english-words/master/words.txt")
# A function that returns possible words given a set of letters. The letters
# are provided as a single string argument (e.g., "ilrfle").
possible.words = function(letters) {
# Filter to words that contain only letters in the list. This step isn't
# strictly necessary, but it gives later steps a smaller list to have to
# process.
right.letters = unique(dat[grepl(paste("^[", letters, "]+$", sep = ""), dat)])
# We're going to create a data frame where the first column is the word and
# the second column is the word with its characters sorted in alphabetical
# order. Start with the first column.
df = data.frame(word = right.letters, stringsAsFactors = F)
# Now add the second column. This could probably be done in dplyr, but my
# initial attempt with mutate didn't work, and for the examples I've tried
# the loop actually doesn't take too long.
for(i in 1:nrow(df)) {
df$sorted.word[i] = paste(sort(unlist(strsplit(df$word[i], ""))), collapse = "")
}
# Now we want to extract words that contain only as many tokens of each
# letter as there were in the initial set. We can use a regular expression
# to compare the (sorted) letters of the initial set to the (sorted) letters
# of each word, where each letter in the initial set is optional.
sorted.letters.regex = paste(sort(paste(unlist(strsplit(letters, "")), "?", sep = "")), collapse = "")
df = df %>%
filter(grepl(paste("^", sorted.letters.regex, "$", sep = ""), sorted.word))
return(df$word)
}