据我所知,默认情况下 omitNorms 设置为false,但不知何故得分根本不会影响。 它总是显示所有字段的相同分数。
{
"MediaOutletName":"Guardian Money",
"score":6.101774},
{
"MediaOutletName":"The Guardian",
"score":6.101774},
{
"MediaOutletName":"Farmers Guardian",
"score":6.101774},
{
"MediaOutletName":"Guardian Online",
"score":6.101774},
{
"MediaOutletName":"Thames Guardian",
"score":6.101774},
{
在此示例中, The Guardian 应位于顶部并获得比其他人更多的分数。
过滤器:
<fieldType name="text_general" class="solr.TextField" omitNorms="false" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" types="wdfftypes.txt" generateNumberParts="0" stemEnglishPossessive="0" splitOnCaseChange="1" preserveOriginal="1" catenateAll="1" catenateWords="1" catenateNumbers="1" generateWordParts="1" splitOnNumerics="1"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" types="wdfftypes.txt" generateNumberParts="1" stemEnglishPossessive="0" splitOnCaseChange="1" preserveOriginal="1" catenateAll="1" catenateWords="1" catenateNumbers="1" generateWordParts="1" splitOnNumerics="1"/>
</analyzer>
</fieldType>
字段定义
<field name="MediaOutletName" omitNorms="false" type="text_general" multiValued="false" indexed="true" stored="true"/>
调试查询
{
"responseHeader":{
"status":0,
"QTime":0,
"params":{
"q":"MediaOutletName:Guardian",
"indent":"on",
"fl":"MediaOutletName_s,score",
"omit":"undefined",
"wt":"json",
"debugQuery":"on",
"_":"1471275424357"}},
"response":{"numFound":55,"start":0,"maxScore":6.101774,"docs":[
{
"MediaOutletName_s":"Guardian Money",
"score":6.101774},
{
"MediaOutletName_s":"The Guardian",
"score":6.101774},
{
"MediaOutletName_s":"Farmers Guardian",
"score":6.101774},
{
"MediaOutletName_s":"Guardian Online",
"score":6.101774},
{
"MediaOutletName_s":"Thames Guardian",
"score":6.101774},
{
"MediaOutletName_s":"Nenagh Guardian",
"score":6.101774},
{
"MediaOutletName_s":"News Guardian",
"score":6.101774},
{
"MediaOutletName_s":"Gorey Guardian",
"score":6.101774},
{
"MediaOutletName_s":"Cornish Guardian",
"score":6.101774},
{
"MediaOutletName_s":"Somerset Guardian",
"score":6.101774}]
},
"debug":{
"rawquerystring":"MediaOutletName:Guardian",
"querystring":"MediaOutletName:Guardian",
"parsedquery":"MediaOutletName:guardian",
"parsedquery_toString":"MediaOutletName:guardian",
"explain":{
"301":"\n6.101774 = weight(MediaOutletName:guardian in 110) [], result of:\n 6.101774 = score(doc=110,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"332":"\n6.101774 = weight(MediaOutletName:guardian in 125) [], result of:\n 6.101774 = score(doc=125,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"4897":"\n6.101774 = weight(MediaOutletName:guardian in 1016) [], result of:\n 6.101774 = score(doc=1016,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"6923":"\n6.101774 = weight(MediaOutletName:guardian in 2270) [], result of:\n 6.101774 = score(doc=2270,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"8553":"\n6.101774 = weight(MediaOutletName:guardian in 2970) [], result of:\n 6.101774 = score(doc=2970,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"8680":"\n6.101774 = weight(MediaOutletName:guardian in 3045) [], result of:\n 6.101774 = score(doc=3045,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"8686":"\n6.101774 = weight(MediaOutletName:guardian in 3049) [], result of:\n 6.101774 = score(doc=3049,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"8961":"\n6.101774 = weight(MediaOutletName:guardian in 3203) [], result of:\n 6.101774 = score(doc=3203,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"9253":"\n6.101774 = weight(MediaOutletName:guardian in 3396) [], result of:\n 6.101774 = score(doc=3396,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n",
"9344":"\n6.101774 = weight(MediaOutletName:guardian in 3448) [], result of:\n 6.101774 = score(doc=3448,freq=1.0 = termFreq=1.0\n), product of:\n 5.4649 = idf(docFreq=55, docCount=13111)\n 1.116539 = tfNorm, computed from:\n 1.0 = termFreq=1.0\n 1.2 = parameter k1\n 0.75 = parameter b\n 3.436885 = avgFieldLength\n 2.56 = fieldLength\n"},
"QParser":"LuceneQParser",
"timing":{
"time":0.0,
"prepare":{
"time":0.0,
"query":{
"time":0.0},
"facet":{
"time":0.0},
"facet_module":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"debug":{
"time":0.0}},
"process":{
"time":0.0,
"query":{
"time":0.0},
"facet":{
"time":0.0},
"facet_module":{
"time":0.0},
"mlt":{
"time":0.0},
"highlight":{
"time":0.0},
"stats":{
"time":0.0},
"expand":{
"time":0.0},
"debug":{
"time":0.0}}}}}
谢谢,
答案 0 :(得分:1)
在讨论Lucene / Solr / Elastic中的字段长度时,您谈论的是the number of tokens,而不是字段中的字节数。您的所有示例都有两个令牌,因此从应用的长度归一化得到相同的分数。
如果字段是单值,则还可以将整数值索引为字节长度(以字节为单位),并在排序时将其用作平局断开器。我不认为有一个函数查询来获取字段的原始值的长度来执行此运行时(至少之前没有)。