弹性搜索:在嵌套对象

时间:2017-03-03 11:14:10

标签: elasticsearch nested

我无法在嵌套对象中找到单词的某些部分。只找到完整的单词。我的分析仪配置如下:

{
  "settings": {
    "number_of_shards": 1,
    "analysis": {
      "filter": {
        "word_part_filter": {
          "type": "ngram",
          "min_gram": 3,
          "max_gram": 15
        },
        "word_part_front_filter": {
          "type": "edgeNGram",
          "min_gram": 2,
          "max_gram": 15
        },
        "codeid_filter": {
          "type": "pattern_replace",
          "pattern": "[-/.:]",
          "replacement": "",
          "preserve_original": true
        }
      },
      "char_filter": {
        "umlaut_char_filter": {
          "type": "mapping",
          "mappings": [
            "ö=>oe",
            "ä=>ae",
            "ü=>ue",
            "Ã?=>ss",
            "Ã?=>Oe",
            "Ã?=>Ae",
            "Ã?=>Ue"
          ]
        }
      },
      "analyzer": {
        "description_analyser_query": {
          "type": "custom",
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "asciifolding"
          ]
        },
        "description_analyser_idx": {
          "type": "custom",
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "asciifolding",
            "word_part_filter"
          ]
        },
        "name_analyser_query": {
          "type": "custom",
          "char_filter": [
            "umlaut_char_filter"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "asciifolding"
          ]
        },
        "name_analyser_idx": {
          "type": "custom",
          "char_filter": [
            "umlaut_char_filter"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "asciifolding",
            "word_part_filter"
          ]
        },
        "codeid_analyser_query": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter"
          ]
        },
        "codeid_analyser_idx_front": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter",
            "word_part_front_filter"
          ]
        },
        "codeid_analyser_idx_any": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter",
            "word_part_filter"
          ]
        }
      }
    }
  }
}

这是嵌套对象映射(已提取):

{
  "properties": {    
    "aid": {
      "type": "nested",
      "properties": {
        "tpid": {
          "type": "string",
          "analyzer": "codeid_analyser_idx_any"
        },
        "aid": {
          "type": "string",
          "analyzer": "codeid_analyser_idx_any"
        }
      }    
    }
  }
}

我正在搜索此查询(摘录)。只有"嵌套"部分是必不可少的:

{
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "should": [
              {
                "nested": {
                  "path": "aid",
                  "query": {
                    "bool": {
                      "must": {
                        "match": {
                          "aid.aid": {
                            "query": "1200",
                            "analyzer": "codeid_analyser_query"
                          }
                        }
                      },
                      "filter": {
                        "or": [
                          {
                            "match": {
                              "aid.tpid": "buyer_specific"
                            }
                          },
                          {
                            "match": {
                              "aid.tpid": "mytpid"
                            }
                          }
                        ]
                      }
                    }
                  }
                }
              }
            ],
            "minimum_should_match": 1
          }
        }
      ]
    }
  }
}

有一个元素,辅助= 120000008

在字段中使用分析器时,它什么也没找到。在嵌套对象映射和查询中根本不使用分析器时,只能找到完整的单词(例如" 120000008"),但不会找到" 1200"。有什么想法吗?

1 个答案:

答案 0 :(得分:5)

实际上,使用ElasticSearch 5.2,使用名为test的索引,并在名为" product"的类型上应用映射。 (只重写过滤器部分,使其符合查询语言的演变),我得到了正确的结果。查询:

GET test/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "should": [
              {
                "nested": {
                  "path": "aid",
                  "query": {
                    "bool": {
                      "must": {
                        "match": {
                          "aid.aid": {
                            "query": "1200",
                            "analyzer": "codeid_analyser_query"
                          }
                        }
                      },
                      "filter": {
                        "terms": {
                          "aid.tpid": [
                            "mytpid",
                            "buyer_specific"
                          ]
                        }
                      }
                    }
                  }
                }
              }
            ],
            "minimum_should_match": 1
          }
        }
      ]
    }
  }
}

索引:

GET test/_search

{
  "took": 8,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1,
    "hits": [
      {
        "_index": "test",
        "_type": "product",
        "_id": "AVrJ1CSd-NyeQ4r64kP6",
        "_score": 1,
        "_source": {
          "aid": {
            "aid": "120000008",
            "tpid": "mytpid"
          }
        }
      }
    ]
  }
}

分析仪(我删除了变音过滤器,因为它在我的计算机上无法读取,并且它不会像测试中那样改变结果):

PUT test
{
  "settings": {
     "analysis": {
      "filter": {
        "word_part_filter": {
          "type": "ngram",
          "min_gram": 3,
          "max_gram": 15
        },
        "word_part_front_filter": {
          "type": "edgeNGram",
          "min_gram": 2,
          "max_gram": 15
        },
        "codeid_filter": {
          "type": "pattern_replace",
          "pattern": "[-/.:]",
          "replacement": "",
          "preserve_original": true
        }
      },

      "analyzer": {
        "description_analyser_query": {
          "type": "custom",
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "asciifolding"
          ]
        },
        "description_analyser_idx": {
          "type": "custom",
          "char_filter": [
            "html_strip"
          ],
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "stop",
            "asciifolding",
            "word_part_filter"
          ]
        },

        "codeid_analyser_query": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter"
          ]
        },
        "codeid_analyser_idx_front": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter",
            "word_part_front_filter"
          ]
        },
        "codeid_analyser_idx_any": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "codeid_filter",
            "word_part_filter"
          ]
        }
      }
     }
  }
}

产品上的映射:

PUT test/_mapping/product
{

  "properties": {    
    "aid": {
      "type": "nested",
      "properties": {
        "tpid": {
          "type": "string",
          "analyzer": "codeid_analyser_idx_any"
        },
        "aid": {
          "type": "string",
          "analyzer": "codeid_analyser_idx_any"
        }
      }    
    }
  }
}