在marklogic中获得确切总搜索结果的最佳方法

时间:2016-07-26 19:33:28

标签: marklogic marklogic-8

当我尝试使用search:estimate获取总搜索结果时,我得到了错误的结果..当我尝试从search:search解析总数时,它也会给我错误或从一个页面到另一个页面页面我获得了不同的总数。

如何获取搜索字符串的确切计数?

--- XXXX编辑问题------

我的数据库由JSON文档组成,这些JSON文档在结构上是分层的。例如:以下是样本,我保留在帖子的末尾..抱歉粘贴我的整个JSON结构,但我认为你明白了..

我在某些元素(如

)上创建了Field(s)/ Field(s)Range索引
concept_species /species
concept_name    /name
concept_registrar   /registrar/name
cept_scientist  /scientist/name
concept_supplier    /suppliers/name
concept_entitySubType   /entitySubType
concept_entityType  /entityType
concept_createdDate /createdDate
concept_project /project/name
concept_moniker /moniker

当我的搜索中有一个作为“约束”时,我的xdmp:估计就好了......但是当我的搜索字符串中没有任何这些约束时,那么xdmp:估计是关闭的..我的搜索结果虽然很好..所有索引看起来都不错?为什么会这样??因此我恢复了fn:count以获得总搜索结果。

这可能与这个问题无关,但为了完整性,我添加了这个..我创建了一个自定义约束,它基本上采用约束并将其转换为json中的路径。例如:让我们说用户想要搜索名称为“ATCC”的供应商..所以我没有用户输入整个路径,而是创建了一个自定义约束,其中就像json结构,我的constriant将其转换为实际的json路径..所以在这种情况下,搜索字符串将如下所示:((concept:suppliers.name:(ATCC))))我的自定义约束concept将其转换为以下cts:query

<cts:json-property-scope-query xmlns:cts="http://marklogic.com/cts">
  <cts:property>suppliers</cts:property>
  <cts:json-property-scope-query>
    <cts:property>name</cts:property>
    <cts:word-query>
      <cts:text xml:lang="en">ATCC</cts:text>
      <cts:option>case-insensitive</cts:option>
      <cts:option>punctuation-insensitive</cts:option>
      <cts:option>whitespace-insensitive</cts:option>
      <cts:option>wildcarded</cts:option>
    </cts:word-query>
  </cts:json-property-scope-query>
</cts:json-property-scope-query>

这是我的JSON文档结构

{
    "moniker": "",
    "entityType": "",
    "entitySubType": "",
    "abbvNumber": "",
    "bioSafetyLevel": "",
    "name": "",
    "extCorpID": "",
    "extLotID": "",
    "selectAgent": "",
    "comments": "",
      "nucleotideSeq": {
        "seq": ""
      },
      "chains": [
        {

          "chainType": "",
          "name": "",
          "plasmidLotID": "",
          "stochiometry": 0,
          "aminoAcids": [
            {
              "sequence": "",
              "predictedMatureSeqs": [
                {
                  "encodedChainName": "",
                  "encodedChainType": "",
                  "sequence": "",
                  "domains": [
                    {
                      "allotype": "",
                      "domainType": "",
                      "entrezgeneID": "",
                      "geneSymbol": "",
                      "heavyChainIsoType": "",
                      "lightChainIsoType": "",
                      "name": "",
                      "regonizedAntigenFK": "",
                      "species": "",
                      "heavyChainIsoTypeMutation": "",
                      "antigens": [
                        {

                          "antiIdiotypeType": "",
                          "antibodyAntigen": "",
                          "corporateID": "",
                          "description": "",
                          "entrezgeneID": "",
                          "geneSymbol": "",
                          "name": "",
                          "relatedProtein": "",
                          "sequence": "",
                          "species": "",
                          "type": "",
                          "externalID": ""
                        }
                      ]
                    }
                  ]
                }
              ],
              "domains": [
                {
                  "allotype": "",
                  "domainType": "",
                  "entrezgeneID": "",
                  "geneSymbol": "",
                  "heavyChainIsoType": "",
                  "lightChainIsoType": "",
                  "name": "",
                  "regonizedAntigenFK": "",
                  "species": "",
                  "heavyChainIsoTypeMutation": "",
                  "antigens": [
                    {

                      "antiIdiotypeType": "",
                      "antibodyAntigen": "",
                      "corporateID": "",
                      "description": "",
                      "entrezgeneID": "",
                      "geneSymbol": "",
                      "name": "",
                      "relatedProtein": "",
                      "sequence": "",
                      "species": "",
                      "type": "",
                      "externalID": ""
                    }
                  ]
                }
              ]
            }
          ],
          "constructs": [
            {
              "plasmidID": "",
              "precursorAminoAcidSeq": ""
            }
          ]
        }
      ],
      "supplier": {
        "name": "",
        "productID": "",
        "atccCatalogNumber": "",
        "lotID": ""
      },
      "preparation": {
        "type": "",
        "lotIDs": [
          ""
        ],
        "amminoAcidDerivatization": "",
        "chemicalConjugations": [
          {
            "name": "",
            "dar": ""
          }
        ],
        "peptidateTreatment": "",
        "proteinTreatment": "",
        "purification": "",
        "expressionSystem": "",
        "empty": false
      }
    },
    "project": {

      "name": "",
      "status": ""
    },
    "registrar": {
      "username": "",
      "email": "",
      "name": "",
      "upi": "",
      "admin": false,
      "curator": false,
      "approvedUser": false
    },
    "scientist": {
      "username": "",
      "email": "",
      "name": "",
      "upi": "",
      "admin": false,
      "curator": false,
      "approvedUser": false
    },
    "notebook": {

      "elnPage": "",
      "upi": "",
      "location": "",
      "subpage": ""
    },
    "growthFS": {

      "mediumUsed": "",
      "otherComponents": "",
      "percentCO2": 0,
      "percentHumudity": 0,
      "percentSerum": 0,
      "selectionMarker": "",
      "spinnerPlateSpeed": 0,
      "temp": 0,
      "drugResistance": "",
      "growthConditions": "",
      "passageNumber": ""
    },
    "origin": {

      "dateOfTransfection": "",
      "hcAntibodyIsotype": "",
      "lcAntibodyIsotype": "",
      "parentCellLineLotID": "",
      "parentChildRel": "",
      "parentTissueSpecies": "",
      "strain": "",
      "tissueSource": "",
      "celllineMemID": "",
      "dateFrozen": "",
      "strFingerprint": "",
      "plasmidLotIDs": [
        ""
      ]
    },
    "miscellaneous": {

      "expHostType": "",
      "selEukaryote": "",
      "selProkaryote": "",
      "buffer": "",
      "enotoxinLevel": "",
      "enotoxinUnit": "",
      "enotoxinMethod": "",
      "concentrationLevel": "",
      "concentrationUnit": "",
      "concentrationMethod": "",
      "mixture": "",
      "proteinMw": 0
    },
    "nucleotideSeq": {
      "seq": ""
    },
    "preparation": {

      "type": "",
      "lotIDs": [
        ""
      ],
      "amminoAcidDerivatization": "",
      "chemicalConjugations": [
        {
          "name": "",
          "dar": ""
        }
      ],
      "peptidateTreatment": "",
      "proteinTreatment": "",
      "purification": "",
      "expressionSystem": "",
      "empty": false
    },
    "adc": {

      "dars": [
        {
          "value": 0,
          "method": "",
          "precision": "",
          "empty": false
        }
      ],
      "aggregations": [
        {
          "percentAggMethod": "",
          "percentAggValue": 0
        }
      ]
    },
    "createdBy": "",
    "createdDate": "",
    "modifiedBy": "",
    "modifiedDate": "",
    "alternateName": "",
    "chains": [
      {

        "chainType": "",
        "name": "",
        "plasmidLotID": "",
        "stochiometry": 0,
        "aminoAcids": [
          {
            "sequence": "",
            "predictedMatureSeqs": [
              {

                "avgMolWt": 0,
                "encodedChainName": "",
                "encodedChainType": "",
                "length": 0,
                "sequence": "",
                "domains": [
                  {

                    "allotype": "",
                    "domainType": "",
                    "domainEnd": 0,
                    "entrezgeneID": "",
                    "geneSymbol": "",
                    "heavyChainIsoType": "",
                    "lightChainIsoType": "",
                    "name": "",
                    "regonizedAntigenFK": "",
                    "species": "",
                    "domainStart": 0,
                    "heavyChainIsoTypeMutation": "",
                    "antigens": [
                      {

                        "antiIdiotypeType": "",
                        "antibodyAntigen": "",
                        "corporateID": "",
                        "description": "",
                        "entrezgeneID": "",
                        "geneSymbol": "",
                        "name": "",
                        "relatedProtein": "",
                        "sequence": "",
                        "species": "",
                        "type": "",
                        "externalID": ""
                      }
                    ]
                  }
                ]
              }
            ],
            "domains": [
              {

                "allotype": "",
                "domainType": "",
                "domainEnd": 0,
                "entrezgeneID": "",
                "geneSymbol": "",
                "heavyChainIsoType": "",
                "lightChainIsoType": "",
                "name": "",
                "regonizedAntigenFK": "",
                "species": "",
                "domainStart": 0,
                "heavyChainIsoTypeMutation": "",
                "antigens": [
                  {

                    "antiIdiotypeType": "",
                    "antibodyAntigen": "",
                    "corporateID": "",
                    "description": "",
                    "entrezgeneID": "",
                    "geneSymbol": "",
                    "name": "",
                    "relatedProtein": "",
                    "sequence": "",
                    "species": "",
                    "type": "",
                    "externalID": ""
                  }
                ]
              }
            ]
          }
        ],
        "constructs": [
          {
            "plasmidID": "",
            "precursorAminoAcidSeq": ""
          }
        ]
      }
    ],
    "orfs": [
      {

        "orfEnd": 0,
        "intronsPresent": "",
        "orfStart": 0,
        "promoters": [
          ""
        ],
        "aminoAcids": [
          {
            "sequence": "",
            "predictedMatureSeqs": [
              {
                "encodedChainName": "",
                "encodedChainType": "",
                "length": 0,
                "sequence": "",
                "domains": [
                  {

                    "allotype": "",
                    "domainType": "",
                    "domainEnd": 0,
                    "entrezgeneID": "",
                    "geneSymbol": "",
                    "heavyChainIsoType": "",
                    "lightChainIsoType": "",
                    "name": "",
                    "regonizedAntigenFK": "",
                    "species": "",
                    "domainStart": 0,
                    "heavyChainIsoTypeMutation": "",
                    "antigens": [
                      {

                        "antiIdiotypeType": "",
                        "antibodyAntigen": "",
                        "corporateID": "",
                        "description": "",
                        "entrezgeneID": "",
                        "geneSymbol": "",
                        "name": "",
                        "relatedProtein": "",
                        "sequence": "",
                        "species": "",
                        "type": "",
                        "externalID": ""
                      }
                    ]
                  }
                ]
              }
            ],
            "domains": [
              {
                "allotype": "",
                "domainType": "",
                "domainEnd": 0,
                "entrezgeneID": "",
                "geneSymbol": "",
                "heavyChainIsoType": "",
                "lightChainIsoType": "",
                "name": "",
                "regonizedAntigenFK": "",
                "species": "",
                "domainStart": 0,
                "heavyChainIsoTypeMutation": "",
                "antigens": [
                  {

                    "antiIdiotypeType": "",
                    "antibodyAntigen": "",
                    "corporateID": "",
                    "description": "",
                    "entrezgeneID": "",
                    "geneSymbol": "",
                    "name": "",
                    "relatedProtein": "",
                    "sequence": "",
                    "species": "",
                    "type": "",
                    "externalID": ""
                  }
                ]
              }
            ]
          }
        ],
        "ncSeq": {

          "seq": ""
        },
        "label": "",
        "note": ""
      }
    ],
    "antigens": [
      {

        "antiIdiotypeType": "",
        "antibodyAntigen": "",
        "corporateID": "",
        "description": "",
        "entrezgeneID": "",
        "geneSymbol": "",
        "name": "",
        "relatedProtein": "",
        "sequence": "",
        "species": "",
        "type": "",
        "externalID": ""
      }
    ],
    "immunogens": [
      {

        "type": "",
        "name": "",
        "entrezgeneID": "",
        "geneSymbol": "",
        "corporateID": "",
        "species": "",
        "lotID": "",
        "sequence": ""
      }
    ],
    "suppliers": [
      {

        "name": "",
        "productID": "",
        "atccCatalogNumber": "",
        "lotID": ""
      }
    ],
    "domains": [
      {

        "allotype": "",
        "domainType": "",
        "domainEnd": 0,
        "entrezgeneID": "",
        "geneSymbol": "",
        "heavyChainIsoType": "",
        "lightChainIsoType": "",
        "name": "",
        "regonizedAntigenFK": "",
        "species": "",
        "domainStart": 0,
        "heavyChainIsoTypeMutation": "",
        "antigens": [
          {

            "antiIdiotypeType": "",
            "antibodyAntigen": "",
            "corporateID": "",
            "description": "",
            "entrezgeneID": "",
            "geneSymbol": "",
            "name": "",
            "relatedProtein": "",
            "sequence": "",
            "species": "",
            "type": "",
            "externalID": ""
          }
        ]
      }
 }

4 个答案:

答案 0 :(得分:1)

Sam Mefford在他的评论中提供了更好的答案 - “请记住过滤与未过滤的搜索。如果您的查询和所有索引正确,这样您就可以运行未经过滤的搜索,您的查询将运行得更快,我相信您的搜索:估计和搜索:搜索总数将是准确的。“

fn:count()永远不是最优的。仅用于计算小序列,文档集,结果集等。过滤搜索也明显慢于未过滤搜索。如果您调整索引,则可以获得未经过滤的搜索,其中包含来自搜索的精确计数:估计,xdmp:估计和搜索:搜索分页。

答案 1 :(得分:0)

所有关于碎片以及您看到的数字基于碎片估计的事实。如果你没有看到你的期望,那么有一些选项(更改文档,片段根/父母,过滤搜索等)..但是,正如wst提到 - 举个例子然后人们将能够给你更直接的指导..

答案 2 :(得分:0)

我受到了性能影响,但能够使用fn:count

解决

我正在使用search:search自定义约束,所以在我的情况下我所要做的就是以下

fn:count(cts:search(fn:doc(), cts:query(search:parse($q, $options))))

答案 3 :(得分:0)

如果您没有定义任何fragmentation strategyxdmp:estimate应该给出正确的结果。它会比fn:count快得多。 您可以重写代码 -

  

xdmp:estimate(cts:search(fn:doc(),cts:query(search:parse($ q,$ options))))