在复合文本搜索和时间戳mongodb上查询文本搜索索引

时间:2020-05-05 14:04:46

标签: mongodb mongodb-query full-text-search query-optimization compound-index

我有一个馆藏,总共有大约6500万条这样的记录

    {
    "_id" : ObjectId("5e0b814660da38d499ecf178"),
    "brands" : null,
    "client_id" : null,
    "code_co_owner" : ",7359562, ",
    "code_segment" : "7359562",
    "core" : "",
    "created" : "01-01-2020",
    "created_full" : "01-01-2020 00:00:27",
    "created_int" : NumberLong(1577811627),
    "email" : ",phamthanhlam17_gmail_com, "
.....
}

我在(email,created_int)上做了一个复合索引:{“ email”:文本,created_int:-1}用于搜索和过滤在created_int范围内的名称 但我发现搜索效果不佳。

我试图在查询中使用解释:

    db.getCollection('profile_20201').explain().find({"$text":{"$search":"phamthanhlam17_gmail_com"},
"created_int":{"$lte":1585627013, "$gte":1583035013}}).count()

解释结果是:

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "indexFilterSet" : false,
        "parsedQuery" : {
            "$and" : [ 
                {
                    "created_int" : {
                        "$lte" : 1585627013.0
                    }
                }, 
                {
                    "created_int" : {
                        "$gte" : 1583035013.0
                    }
                }, 
                {
                    "$text" : {
                        "$search" : "phamthanhlam17_gmail_com",
                        "$language" : "english",
                        "$caseSensitive" : false,
                        "$diacriticSensitive" : false
                    }
                }
            ]
        },
        "winningPlan" : {
            "stage" : "COUNT",
            "inputStage" : {
                "stage" : "TEXT",
                "indexPrefix" : {},
                "indexName" : "email_text_created_int_-1",
                "parsedTextQuery" : {
                    "terms" : [ 
                        "phamthanhlam17_gmail_com"
                    ],
                    "negatedTerms" : [],
                    "phrases" : [],
                    "negatedPhrases" : []
                },
                "textIndexVersion" : 3,
                "inputStage" : {
                    "stage" : "TEXT_MATCH",
                    "inputStage" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "OR",
                            "filter" : {
                                "$and" : [ 
                                    {
                                        "created_int" : {
                                            "$lte" : 1585627013.0
                                        }
                                    }, 
                                    {
                                        "created_int" : {
                                            "$gte" : 1583035013.0
                                        }
                                    }
                                ]
                            },
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "keyPattern" : {
                                    "_fts" : "text",
                                    "_ftsx" : 1,
                                    "created_int" : -1.0
                                },
                                "indexName" : "email_text_created_int_-1",
                                "isMultiKey" : true,
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "backward",
                                "indexBounds" : {}
                            }
                        }
                    }
                }
            }
        },
        "rejectedPlans" : []
    },
    "serverInfo" : {
    },
    "ok" : 1.0
}

解释统计信息:

    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "namespace",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "$and" : [ 
                {
                    "created_int" : {
                        "$lte" : 1585627013.0
                    }
                }, 
                {
                    "created_int" : {
                        "$gte" : 1583035013.0
                    }
                }, 
                {
                    "$text" : {
                        "$search" : "phamthanhlam17_gmail_com",
                        "$language" : "english",
                        "$caseSensitive" : false,
                        "$diacriticSensitive" : false
                    }
                }
            ]
        },
        "winningPlan" : {
            "stage" : "COUNT",
            "inputStage" : {
                "stage" : "TEXT",
                "indexPrefix" : {},
                "indexName" : "email_text_created_int_-1",
                "parsedTextQuery" : {
                    "terms" : [ 
                        "phamthanhlam17_gmail_com"
                    ],
                    "negatedTerms" : [],
                    "phrases" : [],
                    "negatedPhrases" : []
                },
                "textIndexVersion" : 3,
                "inputStage" : {
                    "stage" : "TEXT_MATCH",
                    "inputStage" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "OR",
                            "filter" : {
                                "$and" : [ 
                                    {
                                        "created_int" : {
                                            "$lte" : 1585627013.0
                                        }
                                    }, 
                                    {
                                        "created_int" : {
                                            "$gte" : 1583035013.0
                                        }
                                    }
                                ]
                            },
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "keyPattern" : {
                                    "_fts" : "text",
                                    "_ftsx" : 1,
                                    "created_int" : -1.0
                                },
                                "indexName" : "email_text_created_int_-1",
                                "isMultiKey" : true,
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "backward",
                                "indexBounds" : {}
                            }
                        }
                    }
                }
            }
        },
        "rejectedPlans" : []
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 0,
        "executionTimeMillis" : 1499057,
        "totalKeysExamined" : 72544123,
        "totalDocsExamined" : 39448083,
        "executionStages" : {
            "stage" : "COUNT",
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 1483861,
            "works" : 72544124,
            "advanced" : 0,
            "needTime" : 72544123,
            "needYield" : 0,
            "saveState" : 578233,
            "restoreState" : 578233,
            "isEOF" : 1,
            "invalidates" : 0,
            "nCounted" : 39448083,
            "nSkipped" : 0,
            "inputStage" : {
                "stage" : "TEXT",
                "nReturned" : 39448083,
                "executionTimeMillisEstimate" : 1475831,
                "works" : 72544124,
                "advanced" : 39448083,
                "needTime" : 33096040,
                "needYield" : 0,
                "saveState" : 578233,
                "restoreState" : 578233,
                "isEOF" : 1,
                "invalidates" : 0,
                "indexPrefix" : {},
                "indexName" : "email_text_created_int_-1",
                "parsedTextQuery" : {
                    "terms" : [ 
                        "phamthanhlam17_gmail_com"
                    ],
                    "negatedTerms" : [],
                    "phrases" : [],
                    "negatedPhrases" : []
                },
                "textIndexVersion" : 3,
                "inputStage" : {
                    "stage" : "TEXT_MATCH",
                    "nReturned" : 39448083,
                    "executionTimeMillisEstimate" : 1473041,
                    "works" : 72544124,
                    "advanced" : 39448083,
                    "needTime" : 33096040,
                    "needYield" : 0,
                    "saveState" : 578233,
                    "restoreState" : 578233,
                    "isEOF" : 1,
                    "invalidates" : 0,
                    "docsRejected" : 0,
                    "inputStage" : {
                        "stage" : "FETCH",
                        "nReturned" : 39448083,
                        "executionTimeMillisEstimate" : 1465951,
                        "works" : 72544124,
                        "advanced" : 39448083,
                        "needTime" : 33096040,
                        "needYield" : 0,
                        "saveState" : 578233,
                        "restoreState" : 578233,
                        "isEOF" : 1,
                        "invalidates" : 0,
                        "docsExamined" : 39448083,
                        "alreadyHasObj" : 0,
                        "inputStage" : {
                            "stage" : "OR",
                            "filter" : {
                                "$and" : [ 
                                    {
                                        "created_int" : {
                                            "$lte" : 1585627013.0
                                        }
                                    }, 
                                    {
                                        "created_int" : {
                                            "$gte" : 1583035013.0
                                        }
                                    }
                                ]
                            },
                            "nReturned" : 39448083,
                            "executionTimeMillisEstimate" : 439664,
                            "works" : 72544124,
                            "advanced" : 39448083,
                            "needTime" : 33096040,
                            "needYield" : 0,
                            "saveState" : 578233,
                            "restoreState" : 578233,
                            "isEOF" : 1,
                            "invalidates" : 0,
                            "dupsTested" : 72544123,
                            "dupsDropped" : 0,
                            "recordIdsForgotten" : 0,
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "nReturned" : 72544123,
                                "executionTimeMillisEstimate" : 291188,
                                "works" : 72544124,
                                "advanced" : 72544123,
                                "needTime" : 0,
                                "needYield" : 0,
                                "saveState" : 578233,
                                "restoreState" : 578233,
                                "isEOF" : 1,
                                "invalidates" : 0,
                                "keyPattern" : {
                                    "_fts" : "text",
                                    "_ftsx" : 1,
                                    "created_int" : -1.0
                                },
                                "indexName" : "email_text_created_int_-1",
                                "isMultiKey" : true,
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "backward",
                                "indexBounds" : {},
                                "keysExamined" : 72544123,
                                "seeks" : 1,
                                "dupsTested" : 72544123,
                                "dupsDropped" : 0,
                                "seenInvalidated" : 0
                            }
                        }
                    }
                }
            }
        }
    },
    "serverInfo" : {
    },
    "ok" : 1.0
}```

So, is the index is cover the query?

Or which index will give me better performance for this problem?

Thank you.

1 个答案:

答案 0 :(得分:1)

好吧,看来您已经用text创建了复合索引。但是在官方的MongoDB Documentation中,它表示:

复合索引可以包含文本索引键和升/降索引键。但是,这些复合索引具有以下限制:

  • 复合文本索引不能包含任何其他特殊索引类型,例如多键或地理空间索引字段。
  • 如果复合文本索引在文本索引键之前包含键,则要执行$ text搜索,查询谓词必须在前面的键上包含相等匹配条件。 (您正在此处使用范围查询)
  • 创建复合文本索引时,所有文本索引键必须在索引规范文档中相邻列出。

所以,这是第一个问题。


接下来,我希望您看看prefixes,它将帮助您了解如何在查询中使用复合索引。

希望这可以帮助您理解问题:)