Elasticsearch Soundex匹配查询 - NEST

时间:2016-12-21 14:02:08

标签: elasticsearch logstash c#-2.0 nest soundex

有谁能想到为什么这可能不起作用?我基本上有两个字段,我使用soundex分析器进行索引,请参阅下面的配置,但是当我使用类似于索引中存储的名称进行搜索时,它不起作用。

anz.Custom("soundex_analyzer", dma => dma
                    .Tokenizer("keyword")
                    .Filters("lowercase", "icu_folding", "soundex_filter"));


tk.Phonetic("soundex_filter", ph => ph.Encoder(PhoneticEncoder.RefinedSoundex).Replace(false));

[String(Name = "surnameSoundex", Index = FieldIndexOption.Analyzed, Analyzer = "soundex_analyzer")]
public string SurnameSoundex { get; set; }

[String(Name = "forenameSoundex", Index = FieldIndexOption.Analyzed, Analyzer = "soundex_analyzer")]
public string ForenameSoundex { get; set; }


if (string.IsNullOrEmpty(oReq.person.ForenameSoundex) || oReq.person.ForenameSoundex.Length < 3)
  {
    _qc = _qd.Match(mt => mt.Field(fld => fld.SurnameSoundex).Query(oReq.person.SurnameSoundex));
    _AndQueries.Add(_qc);
    _qc = null;
  }
  else
     {
      //search on surname and combination of forename and surname
       _qc = _qd.Match(mt => mt.Field(fld => fld.SurnameSoundex).Query(oReq.person.SurnameSoundex))
                        || _qd.Match(mt => mt.Field(fld => fld.SurnameSoundex).Query(oReq.person.SurnameSoundex))
                        && _qd.Match(mt => mt.Field(fld => fld.ForenameSoundex).Query(oReq.person.ForenameSoundex));
       _AndQueries.Add(_qc);
       _qc = null;
       }

查询的构造没有任何问题,因为我已经检查了这一点,但基本上这些查询被传递到查询容器列表并转换为数组可以传递给bool查询。

我不确定我是否无法在此分析器中使用关键字tokenizer。

提前致谢!

编辑:

所以基本上我有一个定义了POCO属性的人类:

[ElasticsearchType(Name = "person", IdProperty = "id")]
public class Person
{
    [String(Name = "id", Index = FieldIndexOption.NotAnalyzed)]
    public string id { get; set; }

    [String(Name = "forename", Index = FieldIndexOption.Analyzed, Analyzer = "low_whit_analyzer", SearchAnalyzer = "low_whit_analyzer")]
    public string forename { get; set; }

    [String(Name = "forenameSoundex", Index = FieldIndexOption.Analyzed, Analyzer = "soundex_analyzer", SearchAnalyzer = "soundex_analyzer")]
    public string forenameSoundex { get; set; }

    [String(Name = "surname", Index = FieldIndexOption.Analyzed, Analyzer = "low_whit_analyzer", SearchAnalyzer = "low_whit_analyzer")]
    public string surname { get; set; }

    [String(Name = "surnameSoundex", Index = FieldIndexOption.Analyzed, Analyzer = "soundex_analyzer", SearchAnalyzer = "soundex_analyzer")]
    public string surnameSoundex { get; set; }

    [Date(Name = "dob", Index = NonStringIndexOption.NotAnalyzed, Format = "date_optional_time")]
    public DateTime dob { get; set; }

    [String(Name = "postCode1", Index = FieldIndexOption.Analyzed, Analyzer = "keyword_analyzer", NullValue = null)]
    public string postCode1 { get; set; }

    [String(Name = "postCode2", Index = FieldIndexOption.Analyzed, Analyzer = "keyword_analyzer", NullValue = null)]
    public string postCode2 { get; set; }

    [String(Name = "identifier", Index = FieldIndexOption.Analyzed, Analyzer = "low_whit_analyzer", NullValue = null)]
    public string identifier { get; set; }

    [String(Name = "email", Index = FieldIndexOption.Analyzed, Analyzer = "keyword_analyzer", NullValue = null)]
    public string email { get; set; }

    [String(Name = "mobile", Index = FieldIndexOption.Analyzed, Analyzer = "low_whit_analyzer", NullValue = null)]
    public string mobile { get; set; }

    [String(Name = "gender", Index = FieldIndexOption.Analyzed, Analyzer = "keyword_analyzer")]
    public string gender { get; set; }

    [String(Name = "notes", Index = FieldIndexOption.NotAnalyzed)]
    public string notes { get; set; }

    [String(Name = "address1", Index = FieldIndexOption.NotAnalyzed, NullValue = null)]
    public string address1 { get; set; }

    [String(Name = "address2", Index = FieldIndexOption.NotAnalyzed, NullValue = null)]
    public string address2 { get; set; }

    [String(Name = "personalReferenceId", Index = FieldIndexOption.Analyzed, Analyzer = "low_whit_analyzer")]
    public string personalReferenceId { get; set; }
}

然后我使用以下代码创建索引:

 Uri eSAddress = new Uri(ConfigurationManager.AppSettings["ElasticSearchUrl"]);
        _clientSettings = new ConnectionSettings(eSAddress)
            .MapDefaultTypeIndices(i => i.Add(typeof(Person), "people"));
        _client = new ElasticClient(_clientSettings);

        var oRequest = new IndexExistsRequest("people");
        var bIndexExists = _client.IndexExists(oRequest);

        if (bIndexExists.Exists == false)
        {
            var oIndexResponse = _client.CreateIndex("people", c => c
             .Settings(st => st
                .RefreshInterval(-1)
                .Translog((ts) => SetupTranslogSettings(ts))
                .NumberOfShards(1)
                .NumberOfReplicas(0)
                    .Analysis(an => an
                        .TokenFilters((tf) => SetUpFilters(tf))
                        .Analyzers((anz) => SetUpAnalyzers(anz)
                 )))
                .Mappings(mp => mp.Map<Person>(m => m
                .AutoMap()
                .AllField(al => al.Enabled(false)))));

然后我使用logstash使用以下配置从数据库导入我的记录:

statement => "SELECT IGF_UID AS id, IGF_FORENAME AS forename, IGF_SURNAME AS surname, IGF_FORENAME AS forenameSoundex, IGF_SURNAME AS surnameSoundex, 
              IGF_DATE_OF_BIRTH AS dob, IGF_POSTCODE1 AS postCode1, IGF_POSTCODE2 AS postCode2, IGF_NHS_NUMBER AS identifier, IGF_EMAIL AS email, 
              IGF_MOBILE AS mobile, (CASE IGF_SEX
                        WHEN 'male' THEN 'm'
                        WHEN 'female' THEN 'f'
                        WHEN 'transgender' THEN 't'
                        WHEN 'unknown' THEN 'u'
                        WHEN '' THEN NULL
                        ELSE IGF_SEX
                        END) AS gender, IGF_ADDRESS1 AS address1, IGF_ADDRESS2 AS address2 FROM dbo.IGT_PEOPLE"
   }
 }
  filter {
   mutate {
    remove_field => [ "@timestamp", "@version" ]
     }
    }

 output {
elasticsearch {
    hosts => "localhost"
    index => "people"
    document_type => "person"
    document_id => "%{id}"
    manage_template => false
    template_overwrite => false
    }
}

我的分析器包含在下面 - 注意我已经改为双元电话令牌过滤器:

 public static void AddSoundexAnalyzer(ref AnalyzersDescriptor anz)
    {
        anz.Custom("soundex_analyzer", dma => dma
                    .Tokenizer("keyword")
                    .Filters("soundex_filter"));
    }

 public static void AddSoundexFilter(ref TokenFiltersDescriptor tk)
    {
        tk.Phonetic("soundex_filter", ph => ph.Encoder(PhoneticEncoder.DoubleMetaphone).Replace(true));
    }

然后我使用bool查询查询必须应该匹配的查询。所以应该匹配至少一个查询。

  public SearchDescriptor<Person> FuzzySearch(PersonSearchRequest oReq)
    {
        var oPerson = oReq.person;
        var oSearchParams = oReq.searchParams;
        _s = new SearchDescriptor<Person>();
        _b = new BoolQueryDescriptor<Person>();
        _AndQueries = new List<QueryContainer>();
        _OrQueries = new List<QueryContainer>();

        GetNameSearchClauses(oReq, ref _OrQueries, ref _AndQueries);

        if (_OrQueries.Count > 0 || _AndQueries.Count > 0)
        {
            _b.Should(_OrQueries.ToArray());
            _b.Must(_AndQueries.ToArray());
            return _s.Query(qu => qu.Bool((z) => _b)).Sort(srt => srt.Descending(SortSpecialField.Score));
        }
        else
        {
            return null;
        }
    }

然后我的forenameSoundex和surnameSoundex查询按以下方法构建:

  public void GetNameSearchClauses(PersonSearchRequest oReq, ref List<QueryContainer> _OrQueries, ref List<QueryContainer> _AndQueries)
    {
        if (oReq.searchParams.useSoundex == true && oReq.person.surnameSoundex.Length > 3)//use different analyzers
        {
            if (!string.IsNullOrEmpty(oReq.person.surnameSoundex))
            {//check if clause is null

                //if no first name then just search on surname
                if (string.IsNullOrEmpty(oReq.person.forenameSoundex) || oReq.person.forenameSoundex.Length < 3)
                {
                    _qc = _qd.Match(mt => mt.Field(fld => fld.surnameSoundex).Query(oReq.person.surnameSoundex));
                    _AndQueries.Add(_qc);
                    _qc = null;
                }
                else
                {
                    //search on surname and combination of forename and surname
                    _qc = _qd.Match(mt => mt.Field(fld => fld.surnameSoundex).Query(oReq.person.surnameSoundex))
                        || _qd.Match(mt => mt.Field(fld => fld.surnameSoundex).Query(oReq.person.surnameSoundex))
                        && _qd.Match(mt => mt.Field(fld => fld.forenameSoundex).Query(oReq.person.forenameSoundex));
                    _AndQueries.Add(_qc);
                    _qc = null;
                }
            }
        }

}

1 个答案:

答案 0 :(得分:2)

问题在于使用logstash的jdbc插件会自动降低列名称。所以在我的sql语句中,当我创建一个别名以直接映射到elasticsearch字段名称时,它在通过logstash时被转换为小写。

我的jdbc配置需要以下行: lowercase_column_names => false