Elasticsearch排序字段异常

时间:2018-11-15 20:26:33

标签: elasticsearch kibana dsl

尝试对某些字段上的列表进行排序。 firstNamelastName,但我注意到结果有些不稳定。

我正在运行一个简单的查询

//Return all the employees from a specific company ordering by lastName asc | desc

GET employee-index-sorting
{
  "query": {
    "bool": {
      "filter": {
        "term": {
          "companyId": 3179
        }
      }
    }
  },
  "sort": [
    {
      "lastName.keyword": { <-- Should this be keyword? or not_analyzed
        "order": "desc"
      }
    }
  ]
}

结果,为什么 van der Mescht van Breda Zwane Zwezwe 之前?

我怀疑我的映射有问题

{
        "_index": "employee-index",
        "_type": "_doc",
        "_id": "637467",
        "_score": null,
        "_source": {
          "companyId": 3179,
          "firstName": "Name",
          "lastName": "van der Mescht",
        },
        "sort": [
          "van der Mescht"
        ]
      },
      {
        "_index": "employee-index",
        "_type": "_doc",
        "_id": "678335",
        "_score": null,
        "_source": {
          "companyId": 3179,
          "firstName": "Name3",
          "lastName": "van Breda",
        },
        "sort": [
          "van Breda"
        ]
      },
      {
        "_index": "employee-index",
        "_type": "_doc",
        "_id": "113896",
        "_score": null,
        "_source": {
          "companyId": 3179,
          "firstName": "Name2",
          "lastName": "Zwezwe",
        },
        "sort": [
          "Zwezwe"
        ]
      },
      {
        "_index": "employee-index",
        "_type": "_doc",
        "_id": "639639",
        "_score": null,
        "_source": {
          "companyId": 3179,
          "firstName": "Name1",
          "lastName": "Zwane",
        },
        "sort": [
          "Zwane"
        ]
      }

映射

发布整个地图,因为我不确定它是否可能还有其他问题。

我应该如何更改lastName和firstName属性,以便对其进行排序?

PUT employee-index-sorting
{
  "settings": {
    "index": {
      "analysis": {
        "filter": {},
        "analyzer": {
          "keyword_analyzer": {
            "filter": [
              "lowercase",
              "asciifolding",
              "trim"
            ],
            "char_filter": [],
            "type": "custom",
            "tokenizer": "keyword"
          },
          "edge_ngram_analyzer": {
            "filter": [
              "lowercase"
            ],
            "tokenizer": "edge_ngram_tokenizer"
          },
          "edge_ngram_search_analyzer": {
            "tokenizer": "lowercase"
          }
        },
        "tokenizer": {
          "edge_ngram_tokenizer": {
            "type": "edge_ngram",
            "min_gram": 2,
            "max_gram": 5,
            "token_chars": [
              "letter"
            ]
          }
        }
      }
    }
  },
  "mappings": {
    "_doc": {
      "properties": {
        "employeeId": {
          "type": "keyword"
        },
        "companyGroupId": {
          "type": "keyword"
        },
        "companyId": {
          "type": "keyword"
        },
        "number": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "preferredName": {
          "type": "text",
          "index": false
        },
        "firstName": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "middleName": {
          "type": "text",
          "index": false
        },
        "lastName": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "fullName": {
          "type": "text",
          "fields": {
            "keywordstring": {
              "type": "text",
              "analyzer": "keyword_analyzer"
            },
            "edgengram": {
              "type": "text",
              "analyzer": "edge_ngram_analyzer",
              "search_analyzer": "edge_ngram_search_analyzer"
            }
          },
          "analyzer": "standard"
        },
        "terminationDate": {
          "type": "date"
        },
        "companyName": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "email": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "idNumber": {
          "type": "text"
        },
        "description": {
          "type": "text",
          "index": false
        },
        "jobNumber": {
          "type": "keyword"
        },
        "frequencyId": {
          "type": "long"
        },
        "frequencyCode": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "frequencyAccess": {
          "type": "boolean"
        }
      }
    }
  }
}

1 个答案:

答案 0 :(得分:7)

对于排序,您需要使用lastName.keyword,这是正确的,无需在那里进行任何更改。

van der Meschtvan Breda早于ZwaneZwezwe的原因是,对字符串的排序发生在字典级别,即基本上使用ASCII table大写字符先于小写字符出现,因此单词按相同顺序排序。但是,由于您是以desc模式进行排序,因此情况恰恰相反:

  • z...
  • ...
  • van der Mescht
  • ...
  • van Breda
  • ...
  • a...
  • ...
  • Zwezwe
  • ...
  • Zwane
  • ...
  • Z...
  • ...
  • A...

要解决此问题,您只需要做的就是在lastName.keyword字段中添加一个normalizer,即,将您的映射更改为此,它将起作用:

{
  "settings": {
    "index": {
      "analysis": {
        "filter": {},
        "analyzer": { 
          ...
        },
        "tokenizer": {
          ...
        },
        "normalizer": {             <-- add this
          "lowersort": {
            "type": "custom",
            "filter": [
              "lowercase"
            ]
          }
        }
      }
    }
  },
  "mappings": {
    "_doc": {
      "properties": {
        ...
        "lastName": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "normalizer": "lowersort",   <-- add this
              "ignore_above": 256
            }
          }
        },
        ...
      }
    }
  }
}