在solr

时间:2016-10-24 08:17:26

标签: solr solrj solr4

我试图在solr中强制输出概率结果。即页面结果的x%(比如k / n)具有属性A.我不确定是否有直接的方法来执行此操作。

我正在尝试查询重新排名方法,其中我想在第一个solr查询中使用属性A来提升文档。这应确保至少k个或更多具有属性A的文档位于顶部。然后我想运行另一个查询,它只选择k个元素,然后在正常分数顺序中选择其余的n-k个元素并返回这些元素的分数排序输出。

然而,我遇到的问题是,如果属性A非常频繁,那么我可能最终会在第一个查询中带来太多的属性A文档,并且由于这个不能决定选择运行的文档数量第二个查询

我仍然是solr的新手,希望能够过度思考这个问题。尽管如此,你们中有没有人遇到过与solr类似的问题,并为此找到了一个低延迟的解决方案?

1 个答案:

答案 0 :(得分:0)

如果我认为你的所有标准都合适,我想加权搜索可以解决问题。如果您将我在下面创建的示例数据加载到演示核心中,并运行edismax搜索,以便将相关性权重设为0.8,将新鲜度设置为0.2,则相关的新答案将在顶部显示。 enter image description here

搜索网址如下:http://<solr_server>.us-west-2.compute.amazonaws.com:8983/solr/gettingstarted/select?defType=edismax&indent=on&q=*:*&qf=relevance^0.8%20AND%20freshness^0.2&rows=10&start=0&wt=json

请注意我将结果集限制在前十位。

带有结果的响应标题如下所示:

{
  "responseHeader":{
    "status":0,
    "QTime":6,
    "params":{
      "q":"*:*",
      "defType":"edismax",
      "indent":"on",
      "qf":"relevance^0.8 AND freshness^0.2",
      "start":"0",
      "rows":"10",
      "wt":"json",
      "_":"1477363414109"}},
  "response":{"numFound":35,"start":0,"docs":[
      {
        "id":"1",
        "name":["JBL Professional Audio"],
        "relevance":[true],
        "freshness":[true],
        "_version_":1549127868117680128},
      {
        "id":"2",
        "name":["Versatile Multilayer Disc"],
        "relevance":[true],
        "freshness":[true],
        "_version_":1549127868288598016},
      {
        "id":"3",
        "name":["Key finder"],
        "relevance":[true],
        "freshness":[false],
        "_version_":1549127868290695168},
      {
        "id":"4",
        "name":["MiniDisc"],
        "relevance":[true],
        "freshness":[false],
        "_version_":1549127868291743744},
      {
        "id":"5",
        "name":["Entertainment center"],
        "relevance":[true],
        "freshness":[false],
        "_version_":1549127868292792320},
      {
        "id":"6",
        "name":["TV gateway"],
        "relevance":[true],
        "freshness":[false],
        "_version_":1549127868294889472},
      {
        "id":"7",
        "name":["Da-Lite Lecterns, Podiums, Racks"],
        "relevance":[true],
        "freshness":[false],
        "_version_":1549127868295938048},
      {
        "id":"8",
        "name":["American Dynamics CCTV Products"],
        "relevance":[true],
        "freshness":[false],
        "_version_":1549127868301180928},
      {
        "id":"9",
        "name":["K-box"],
        "relevance":[true],
        "freshness":[false],
        "_version_":1549127868302229504},
      {
        "id":"10",
        "name":["Wife acceptance factor"],
        "relevance":[true],
        "freshness":[false],
        "_version_":1549127868303278080}]
  }}

您可以从Documents加载的示例数据集如下:

<add>
<doc>
<field name="id">1</field>
<field name="name">JBL Professional Audio</field>
<field name="relevance">true</field>
<field name="freshness">true</field>
</doc>
<doc>
<field name="id">2</field>
<field name="name">Versatile Multilayer Disc</field>
<field name="relevance">true</field>
<field name="freshness">true</field>
</doc>
<doc>
<field name="id">3</field>
<field name="name">Key finder</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">4</field>
<field name="name">MiniDisc</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">5</field>
<field name="name">Entertainment center</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">6</field>
<field name="name">TV gateway</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">7</field>
<field name="name">Da-Lite Lecterns, Podiums, Racks</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">8</field>
<field name="name">American Dynamics CCTV Products</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">9</field>
<field name="name">K-box</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">10</field>
<field name="name">Wife acceptance factor</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">11</field>
<field name="name">J</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">12</field>
<field name="name">Location awareness</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">13</field>
<field name="name">Hybrid Broadcast Broadband TV</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">14</field>
<field name="name">Comparison of digital media players</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">15</field>
<field name="name">Coupon-eligible converter box</field>
<field name="relevance">true</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">16</field>
<field name="name">Crown Audio</field>
<field name="relevance">false</field>
<field name="freshness">true</field>
</doc>
<doc>
<field name="id">17</field>
<field name="name">Q</field>
<field name="relevance">false</field>
<field name="freshness">true</field>
</doc>
<doc>
<field name="id">18</field>
<field name="name">Surround sound</field>
<field name="relevance">false</field>
<field name="freshness">true</field>
</doc>
<doc>
<field name="id">19</field>
<field name="name">Digital Living Network Alliance</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">20</field>
<field name="name">EcoCute</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">21</field>
<field name="name">TV gateway</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">22</field>
<field name="name">List of smart TV platforms and middleware software</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">23</field>
<field name="name">Predicta</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">24</field>
<field name="name">AMX Control Systems</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">25</field>
<field name="name">Batteriser</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">26</field>
<field name="name">Audio equipment</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">27</field>
<field name="name">EcoCute</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">28</field>
<field name="name">Elmo Document Cameras</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">29</field>
<field name="name">Lab Gruppen Audio</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">30</field>
<field name="name">Direct-drive turntable</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">31</field>
<field name="name">Wearable Technology Show</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">32</field>
<field name="name">Power cord</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">33</field>
<field name="name">MiniDisc</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">34</field>
<field name="name">Quattron</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
<doc>
<field name="id">35</field>
<field name="name">I</field>
<field name="relevance">false</field>
<field name="freshness">false</field>
</doc>
</add>

这是你的想法吗?