部分基于ruby - how to generate the possible sequential combination of letters from an array of strings?我现在有一个匹配单词的程序,使用:
class Dictionary
attr :words
def words
@words.map(&:upcase).uniq
end
@@MAPPINGS= {A: 2, B: 2, C: 2, D: 3, E: 3, F: 3, G: 4, H: 4, I: 4, J: 5, K: 5, L: 5,
M: 6, N: 6, O: 6, P: 7, Q: 7, R: 7, S: 7, T: 8, U: 8, V: 8, W: 9, X: 9, Y: 9, Z: 9}
@@PHONE_NUMBER_LENGTH=10
def initialize
@words=[]
end
def add_word(word)
word.length < @@PHONE_NUMBER_LENGTH ? (@words << word) : nil
end
def load_system_dictionary(words_file='/usr/share/dict/american-english')
File.open(words_file).each {|word| add_word(word)}
true
rescue Errno::ENOENT
false
end
def word_combinations(letters)
possibles=[]
letters.each_char do |one_letter|
possibles << letter_mappings(one_letter)
end
possibles.map(&:chars).map(&:to_a).reduce(&:product).map(&:join)
end
def contains_word(word)
@words.join.include?(word.upcase)
end
def word_from_word_combinations(number_string)
returns=[]
word_combinations(number_string).each do |word|
returns << word if @words.include?(word)
end
returns
end
private
def letter_mappings(letter)
@@MAPPINGS.select{ |key,val| val==letter.to_i }.keys.join
end
end
在短于十分之一秒的时间内适用于短和中等长度的单词,例如对于动物。但是对于诸如MUMMIFICATION之类的更长的词,即
it "should see that the valid words for 6866434228466 is 'MUMMIFICATION'" do
expect(dictionary.word_from_word_combinations('6866434228466')).to match_array(['MUMMIFICATION'])
end
测试需要30秒。
我已尝试在
的每个阶段添加.uniq
possibles.map(&:chars).map(&:to_a).reduce(&:product).map(&:join)
我还改用了ruby 2.0
我的默认设置,但这只是在运行时增加了6秒:(
我现在改用sawa的方法:
first, *rest = possibles.map{|s| s.each_char.to_a}
first.product(*rest).map(&:join)
即
def word_combinations(letters)
possibles=[]
letters.each_char do |one_letter|
possibles << letter_mappings(one_letter)
end
first, *rest = possibles.map{|s| s.each_char.to_a}
first.product(*rest).map(&:join)
end
这已经有所帮助,已将其减少到15秒,
来自Marshal的.map(&:chars)
,即
def word_combinations(letters)
possibles=[]
letters.each_char do |one_letter|
possibles << letter_mappings(one_letter)
end
first, *rest = possibles.map(&:chars)
first.product(*rest).map(&:join)
end
很有意思但没有改善表现。
我还有什么别的吗?
答案 0 :(得分:2)
words = File.read("/usr/share/dict/american-english")
.split.map{|w| w.chomp.upcase}
mapping = {A: 2, B: 2, C: 2, D: 3, E: 3, F: 3, G: 4, H: 4, I: 4, J: 5, K: 5,
L: 5, M: 6, N: 6, O: 6, P: 7, Q: 7, R: 7, S: 7, T: 8, U: 8, V: 8, W: 9, X: 9,
Y: 9, Z: 9}
better_mapping = mapping.map{|k, v| [k.to_s, v]}.to_h
t = Time.now
p words.select{|w| w.chars.map{|c| better_mapping[c]}.join == "6866434228466"}
puts Time.now - t
结果:
["MUMMIFICATION"]
0.847988125
mapping = {A: 2, B: 2, C: 2, D: 3, E: 3, F: 3, G: 4, H: 4, I: 4, J: 5, K: 5,
L: 5, M: 6, N: 6, O: 6, P: 7, Q: 7, R: 7, S: 7, T: 8, U: 8, V: 8, W: 9, X: 9,
Y: 9, Z: 9}
better_mapping = mapping.map{|k, v| [k.to_s, v]}.to_h
words = File.read("/usr/share/dict/american-english")
.split.map{|w|
w = w.chomp.upcase
[w, w.chars.map{|c| better_mapping[c]}.join]
}.group_by(&:last)
.map{|k, a| [k, a.map(&:first)]}.to_h
t = Time.now
p words["6866434228466"]
puts Time.now - t
结果:
["MUMMIFICATION"]
8.5981e-05
答案 1 :(得分:1)
在测试中,排练基本上是分配内存,以便我们可以测试实际的实现。
你可以看到它在~10秒内运行,如果你只运行一次就会运行15次。
require 'spec_helper'
require 'benchmark'
module SystemLoader
def load_words(words_file)
p words_file
File.open(words_file).to_enum
rescue Errno::ENOENT
p "System loading failure"
end
end
class Dictionary
include SystemLoader
attr_reader :words
def initialize(source)
@words = load_words(source).each_with_object({}) do |word, hash|
hash[word.chomp.to_sym] = word.chomp
end
end
def [] word
words[word]
end
end
class PhoneNumberWordMapper
MAPPING = {:"0" => [""],
:"1" => [""],
:"2" => ["A", "B", "C"],
:"3" => ["D", "E", "F"],
:"4" => ["G", "H", "I"],
:"5" => ["J", "K", "L"],
:"6" => ["M", "N", "O"],
:"7" => ["P", "Q", "R","S"],
:"8" => ["T", "U", "V"],
:"9" => ["W", "X", "Y","Z"]}
DEFAULT_OPTIONS = {
phone_number_length: 10,
source: "words.txt",
mapping: MAPPING
}
attr_reader :dictionary, :mapping, :max_number_length
def initialize(options = {})
options = DEFAULT_OPTIONS.merge(options)
@dictionary = Dictionary.new(options[:source])
@max_number_length = options[:phone_number_length]
@mapping = options[:mapping]
end
def word_from_word_combinations(number)
matches = []
word_combinations(number).select do |word|
word = word.join
matches << word.upcase if @dictionary[word.to_sym]
end
matches
end
def word_combinations(number, combinations = [])
first, *rest = get_possible_letters(number)
number = number[(1 -number.length)..-1]
combinations = first.map{|letter| [letter] } if combinations.empty?
combinations = combinations.product(rest[0]).map(&:flatten)
return combinations if number.length <= 1
word_combinations(number, combinations)
end
def get_possible_letters(number)
max_number_length ||= 124
return [] if number.length > max_number_length
number.split('').map{|num| MAPPING[num.to_sym]}
end
def possibles(number)
get_letters(number)
end
end
Benchmark.bmbm(7) do |bm|
bm.report('testing word_from_word ') do
mapper = PhoneNumberWordMapper.new
mapper.word_from_word_combinations('6866434228466')
end
end
describe PhoneNumberWordMapper do
it "should see that the valid words for 6866434228466 is 'MUMMIFICATION'" do
dictionary = PhoneNumberWordMapper.new
expect(dictionary.word_from_word_combinations('6866434228466')).to match_array(['MUMMIFICATION' ])
end
end
# Rehearsal -----------------------------------------------------------
# testing word_from_word "source: words.txt"
# "words.txt"
# 15.830000 0.450000 16.280000 ( 16.340899)
# ------------------------------------------------- total: 16.280000sec
#
# user system total real
# testing word_from_word "source: words.txt"
# "words.txt"
# 10.210000 0.150000 10.360000 ( 10.393033)
# "source: words.txt"
# "words.txt"
# .
#
# Finished in 10.06 seconds
# 1 example, 0 failures#
答案 2 :(得分:1)
如果你像下面那样映射字典,在我的电脑上我有2.8秒的结果。 我使用http://www.math.sjsu.edu/~foster/dictionary.txt作为字典,并在末尾添加了mummification(±350000字/行)。这些单词都是小写的,每行一个。这是整个工作脚本。如果您执行第二次查找以重新定位文件指针,则必须进行倒带。
@mapping = {a: 2, b: 2, c: 2, d: 3, e: 3, f: 3, g: 4, h: 4, i: 4, j: 5, k: 5,
l: 5, m: 6, n: 6, o: 6, p: 7, q: 7, r: 7, s: 7, t: 8, u: 8, v: 8, w: 9, x: 9,
y: 9, z: 9}
@words = File.open('dictionary.txt').map
def get_word number
@words.rewind.select{|word|word.chomp.chars.map{|char| @mapping[char.to_sym].to_s}.join.to_i == number.to_i}
end
get_word '6866434228466' => mummification