MongoDB COLSCAN比IDXSCAN

时间:2015-11-06 06:41:45

标签: performance mongodb mongodb-query aggregation-framework

我有一个这样的集合,包含700MB和100万个文档

{
"_id" : "0455923b34b3",
"identity" : [
    {
        "currentIdentity" : "90000000",
        "identityType" : "ONE",
        "identityHistory" : [
            {
                "identity" : "996969999",
                "type" : "ONE",
                "from" : "2014-12-14 00:50:06.971",
                "to" : "2014-03-14 20:32:33.982"
            },
            {
                "identity" : "9969898899",
                "type" : "TWO",
                "from" : "2014-12-14 00:50:06.971",
                "to" : "2014-03-14 20:32:33.982"
            }
        ]
    }
]

}

当Iam尝试像这样查询时

  db.customer.aggregate( [     
        {$match:
            { "identity.identityHistory.from" : {$lt:"2014-09-13 14:18:29.616"} , 
              "identity.identityHistory.to" :{$gt:"2014-08-30 09:26:24.842"}  }  },
            {$unwind : "$identity"},  
            {$unwind:"$identity.identityHistory"}, 
            {$project:{"identity.currentIdentity":1,"identity.identityHistory.identity" : 1 } }
        ])

我使用性能分析

监控性能

1.没有创建索引(colScan),花了526ms

2.在$ match字段上使用复合索引花了29092ms

db.customer.createIndex({"identity.identityHistory.from":1,"identity.identityHistory.to":1})

编辑:

a) RAM:8GB
b) Available RAM:4.3 GB
C) db.stats()
 {
 "db" : "demomodule",
 "collections" : 4,
 "objects" : 1200038,
 "avgObjSize" : 584.2189480666445,
 "dataSize" : 701084938,
 "storageSize" : 179236864,
 "numExtents" : 0,
 "indexes" : 5,
 "indexSize" : 31248384,
 "ok" : 1
 }

d) db.customer.stats()
 {
 "ns" : "demomodule.customer",
 "count" : 999998,
 "size" : 686006200,
 "avgObjSize" : 686,
 "storageSize" : 174927872,
 "capped" : false,
 "wiredTiger" : {
    "metadata" : {
        "formatVersion" : 1
    },
    "creationString" : "allocation_size=4KB,app_metadata= (formatVersion=1),block_allocation=best,block_compressor=snappy,cache_resident=0,checkpoint=(WiredTigerCheckpoint.3=(addr=\"01e2868f81e4cad3e2b5e2869081e4c26e4411e2869181e4d9f84752808080e40a6d0fc0e40a6c5fc0\",order=3,time=1446722125,size=174882816,write_gen=21416)),checkpoint_lsn=(85,95479808),checksum=on,collator=,columns=,dictionary=0,format=btree,huffman_key=,huffman_value=,id=63,internal_item_max=0,internal_key_max=0,internal_key_truncate=,internal_page_max=4KB,key_format=q,key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=64MB,memory_page_max=10m,os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,prefix_compression_min=4,split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,value_format=u,version=(major=1,minor=1)",
    "type" : "file",
    "uri" : "statistics:table:collection-4--2760010649195552578",
    "LSM" : {
        "bloom filters in the LSM tree" : 0,
        "bloom filter false positives" : 0,
        "bloom filter hits" : 0,
        "bloom filter misses" : 0,
        "bloom filter pages evicted from cache" : 0,
        "bloom filter pages read into cache" : 0,
        "total size of bloom filters" : 0,
        "sleep for LSM checkpoint throttle" : 0,
        "chunks in the LSM tree" : 0,
        "highest merge generation in the LSM tree" : 0,
        "queries that could have benefited from a Bloom filter that did not exist" : 0,
        "sleep for LSM merge throttle" : 0
    },
    "block-manager" : {
        "file allocation unit size" : 4096,
        "blocks allocated" : 0,
        "checkpoint size" : 174882816,
        "allocations requiring file extension" : 0,
        "blocks freed" : 0,
        "file magic number" : 120897,
        "file major version number" : 1,
        "minor version number" : 0,
        "file bytes available for reuse" : 40960,
        "file size in bytes" : 174927872
    },
    "btree" : {
        "btree checkpoint generation" : 104,
        "column-store variable-size deleted values" : 0,
        "column-store fixed-size leaf pages" : 0,
        "column-store internal pages" : 0,
        "column-store variable-size leaf pages" : 0,
        "pages rewritten by compaction" : 0,
        "number of key/value pairs" : 0,
        "fixed-record size" : 0,
        "maximum tree depth" : 4,
        "maximum internal page key size" : 368,
        "maximum internal page size" : 4096,
        "maximum leaf page key size" : 3276,
        "maximum leaf page size" : 32768,
        "maximum leaf page value size" : 67108864,
        "overflow pages" : 0,
        "row-store internal pages" : 0,
        "row-store leaf pages" : 0
    },
    "cache" : {
        "bytes read into cache" : 695116039,
        "bytes written from cache" : 0,
        "checkpoint blocked page eviction" : 0,
        "unmodified pages evicted" : 0,
        "page split during eviction deepened the tree" : 0,
        "modified pages evicted" : 0,
        "data source pages selected for eviction unable to be evicted" : 0,
        "hazard pointer blocked page eviction" : 0,
        "internal pages evicted" : 0,
        "pages split during eviction" : 0,
        "in-memory page splits" : 0,
        "overflow values cached in memory" : 0,
        "pages read into cache" : 21401,
        "overflow pages read into cache" : 0,
        "pages written from cache" : 0
    },
    "compression" : {
        "raw compression call failed, no additional data available" : 0,
        "raw compression call failed, additional data available" : 0,
        "raw compression call succeeded" : 0,
        "compressed pages read" : 21306,
        "compressed pages written" : 0,
        "page written failed to compress" : 0,
        "page written was too small to compress" : 0
    },
    "cursor" : {
        "create calls" : 11,
        "insert calls" : 0,
        "bulk-loaded cursor-insert calls" : 0,
        "cursor-insert key and value bytes inserted" : 0,
        "next calls" : 3018525,
        "prev calls" : 1,
        "remove calls" : 0,
        "cursor-remove key bytes removed" : 0,
        "reset calls" : 15650,
        "search calls" : 15492,
        "search near calls" : 142,
        "update calls" : 0,
        "cursor-update value bytes updated" : 0
    },
    "reconciliation" : {
        "dictionary matches" : 0,
        "internal page multi-block writes" : 0,
        "leaf page multi-block writes" : 0,
        "maximum blocks required for a page" : 0,
        "internal-page overflow keys" : 0,
        "leaf-page overflow keys" : 0,
        "overflow values written" : 0,
        "pages deleted" : 0,
        "page checksum matches" : 0,
        "page reconciliation calls" : 0,
        "page reconciliation calls for eviction" : 0,
        "leaf page key bytes discarded using prefix compression" : 0,
        "internal page key bytes discarded using suffix compression" : 0
    },
    "session" : {
        "object compaction" : 0,
        "open cursor count" : 11
    },
    "transaction" : {
        "update conflicts" : 0
    }
},
"nindexes" : 2,
"totalIndexSize" : 29560832,
"indexSizes" : {
    "_id_" : 14008320,
    "identity.identityHistory.from_1" : 15552512
},
"ok" : 1
}

如何以其他方式构建索引或查询以获得更好的性能?

1 个答案:

答案 0 :(得分:-1)

也许尝试这样:

  db.customer.aggregate( [     

            {$unwind : "$identity"},  
            {$unwind:"$identity.identityHistory"},
            {$match: { "identity.identityHistory.from" : {$lt:"2014-09-13 14:18:29.616"} , 
              "identity.identityHistory.to" :{$gt:"2014-08-30 09:26:24.842"}  }  },               
            {$project:{"identity.currentIdentity":1,"identity.identityHistory.identity" : 1 } }
        ])

并保留所有三个索引:

  

db.customer.createIndex({ “identity.identityHistory.from”:1, “identity.identityHistory.to”:1})