我正在使用Elasticsearch在轨道上使用红宝石。我使用
Elastic::Model::JobPosting.recreate_index!
进行索引。我得到的错误:
{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"In Shingle TokenFilter the difference between max_shingle_size and min_shingle_size (and +1 if outputting unigrams) must be less than or equal to: [3] but was [4].
但是在进一步检查带状疱疹值时,我发现:
min_shingle_size: 2,
max_shingle_size: 5,
所以它们之间的区别是“ 3”,但是输出为何变成“ 4”。我似乎无法理解其原因
我的模块/弹性/实用程序代码是:
module ::Elastic::Utils
def self.suggestion_field
{
analyzer: :name_analyzer,
type: "text",
fields: {
:raw => {type: "keyword", index: true}
}
}
end
def self.optimized_suggestion_field
{
analyzer: :suggest_analyzer,
type: "text"
}
end
def self.optimized_settings
{
index: {
number_of_shards: 2,
number_of_replicas: 1
},
analysis: {
analyzer: {
suggest_analyzer: {
tokenizer: 'keyword',
type: 'custom',
filter: %w(suggest_filter)
}
},
filter: {
suggest_filter: {
type: 'edgeNGram',
max_gram: 40,
min_gram: 1
}
},
}
}
end
def self.common_settings
{
index: {
number_of_shards: 2,
number_of_replicas: 1
},
analysis: {
analyzer: {
name_analyzer: {
tokenizer: 'whitespace',
type: 'custom',
filter: %w(lowercase multi_words name_filter)
},
lower_keyword: {
tokenizer: 'keyword',
type: 'custom',
filter: ['lowercase']
},
},
filter: {
multi_words: {
type: 'shingle',
min_shingle_size: 2,
max_shingle_size: 5
},
name_filter: {
type: 'edgeNGram',
max_gram: 40,
min_gram: 1
}
},
}
}
end
def self.normalized_terms(name)
terms = []
terms = name.split(/\W+/) if name.present?
normalized_terms = []
norm_term = ''
terms.reverse.each do |term|
norm_term = "#{term}#{norm_term}"
normalized_terms << norm_term
end
normalized_terms.reverse
end
def self.normalize(name)
return nil if name.blank?
name.downcase.gsub(/[^[:alnum:]]/, ' ').
gsub(' and ', ' ').gsub(' of ', ' ').gsub(' in ', ' ').
gsub('engineering', 'engg').gsub('technology', 'tech').
gsub('bachelor ', 'b ').gsub('master ', 'm ').
squish
end
end