我正在尝试使用 MongoDB 来保存测试运行文档,包括它们的测量值并尝试查询它们。我有大约 3650 万个文档,它们看起来像这样:
{
"_id" : ObjectId("60619f84dc46fad865680ab8"),
"uuid" : "47aad0e5-3e41-4951-8881-3ae70ae85bcd",
"id_tb" : ObjectId("60619f84dc46fad865680225"),
"code" : "",
"ser" : "",
"mat" : "",
"state" : "",
"ts_start" : ISODate("2018-01-01T18:46:56.000Z"),
"ts_end" : ISODate("2018-01-01T18:47:20.000Z"),
"ts_restart" : ISODate("2018-01-01T18:46:56.000Z"),
"values" : [
{
"dtype" : "i",
"v_int" : NumberLong(20030606),
"key" : "x"
},
{
"dtype" : "i",
"v_int" : NumberLong(1208551142),
"key" : "y"
},
...
]
每个测试运行文档都有一个数组“values”,其中包括 730 个子文档(每个测量值一个)。
我使用 C#,所以我的模型如下所示:
public class TestRun
{
[BsonId]
[BsonRepresentation(BsonType.ObjectId)]
public ObjectId Id { get; set; }
[JsonProperty(PropertyName = "uuid")]
[BsonElement("uuid")]
[BsonRepresentation(BsonType.String)]
public string Uuid { get; set; }
[BsonElement("id_tb")]
[BsonRepresentation(BsonType.ObjectId)]
public ObjectId IdTb{ get; set; }
[JsonProperty(PropertyName = "code")]
[BsonElement("code")]
[BsonRepresentation(BsonType.String)]
public string Code { get; set; }
[JsonProperty(PropertyName = "serial")]
[BsonElement("ser")]
[BsonRepresentation(BsonType.String)]
public string Serial { get; set; }
[JsonProperty(PropertyName = "mat")]
[BsonElement("mat")]
[BsonRepresentation(BsonType.String)]
public string Mat { get; set; }
[JsonProperty(PropertyName = "overall")]
[BsonElement("state")]
[BsonRepresentation(BsonType.String)]
public string State { get; set; }
[BsonElement("ts_start")]
[BsonRepresentation(BsonType.DateTime)]
public DateTime? TsStart { get; set; }
[BsonElement("ts_end")]
[BsonRepresentation(BsonType.DateTime)]
public DateTime? TsEnd { get; set; }
[BsonElement("ts_restart")]
[BsonRepresentation(BsonType.DateTime)]
public DateTime? TsRestart { get; set; }
[BsonElement("values")]
public List<TestRunValue> Values { get; set; }
}
public class TestRunValue
{
[BsonElement("dtype")]
public string Datatype { get; set; }
[BsonElement("v_bool")]
[BsonIgnoreIfNull]
public bool? VBool { get; set; }
[BsonElement("v_char")]
[BsonIgnoreIfNull]
public string? VChar { get; set; }
[BsonElement("v_int")]
[BsonIgnoreIfNull]
public long? VInt { get; set; }
[BsonElement("v_real")]
[BsonIgnoreIfNull]
public float? VReal { get; set; }
[BsonElement("key")]
public string Key { get; set; }
}
我为要搜索的字段创建了索引:_id_
、ser
、mat
、ts_start
、ts_end
、{{1} } 和 code
。
总索引大小约为 1.8 GB,平均 257.2 MB,其中 state
最大为 392.6 MB,ts_start
最小为 164.9 MB。
测试运行文档平均为 52.3 KB。
为了测试查询这个集合,我使用了一个 .NET Core 控制台应用程序。查询如下所示:
code
现在我使用
运行这个查询var builder = Builders<TestRun>.Filter;
FilterDefinition<TestRun> filter = builder.Empty;
if (startTime != null)
filter = builder.Gte(t => t.TsStart, startTime);
if (endTime != null)
filter = filter & builder.Lte(t => t.TsEnd, endTime);
if (mats != null && mats.Count > 0)
filter = filter & builder.In(t => t.Mat, mats);
if (sers != null && sers.Count > 0)
filter = filter & builder.In(t => t.Serial, sers);
if (codes != null && codes.Count > 0)
filter = filter & builder.In(t => t.Code, codes);
if (states != null && states.Count > 0)
filter = filter & builder.In(t => t.State, states);
var query = context._testruns
.Find(filter, new FindOptions() {Comment = "MyQuery"})
.Limit(maxNumValues);
Console.WriteLine($"Running query: \n {query.ToString()}");
Stopwatch sw = Stopwatch.StartNew();
List<TestRun> trs = query.ToList();
sw.Stop();
Console.WriteLine($"Query took {sw.ElapsedMilliseconds} ms.");
为 2018-01-01T00:00:00.000Z
startTime
为 2018-01-01T23:59:59.000Z
endTime
为 5000
导致以下查询:
maxNumValues
这需要很长时间才能完成。即使过了 30 分钟,它也没有完成。
即使我使用较短的时间段 (find({
"ts_start" : { "$gte" : ISODate("2018-01-01T00:00:00Z") },
"ts_end" : { "$lte" : ISODate("2018-01-01T23:59:59Z") }
}).limit(5000)._addSpecial("$comment", "MyQuery")
- 2018-01-01T23:00:00Z
) 和较小的限制 (200),也需要很长时间。
我测试了更多限制:
在运行限制为 81(在 Robo 3T 中)的解释时,它似乎在 40286 毫秒内执行:
2018-01-01T23:59:59Z
我尝试使用 mongo shell 而不是 Robo 3T 运行 {
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "db.runs",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"ts_end" : {
"$lte" : ISODate("2018-01-01T23:59:59.000Z")
}
},
{
"ts_start" : {
"$gte" : ISODate("2018-01-01T23:00:00.000Z")
}
}
]
},
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 81,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"ts_start" : {
"$gte" : ISODate("2018-01-01T23:00:00.000Z")
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"ts_end" : 1
},
"indexName" : "ts_end",
"isMultiKey" : false,
"multiKeyPaths" : {
"ts_end" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"ts_end" : [
"(true, new Date(1514851199000)]"
]
}
}
}
},
"rejectedPlans" : [
{
"stage" : "LIMIT",
"limitAmount" : 81,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"ts_end" : {
"$lte" : ISODate("2018-01-01T23:59:59.000Z")
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"ts_start" : 1
},
"indexName" : "ts_start",
"isMultiKey" : false,
"multiKeyPaths" : {
"ts_start" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"ts_start" : [
"[new Date(1514847600000), new Date(9223372036854775807)]"
]
}
}
}
}
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 80,
"executionTimeMillis" : 40286,
"totalKeysExamined" : 248196,
"totalDocsExamined" : 248196,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 80,
"executionTimeMillisEstimate" : 11268,
"works" : 248198,
"advanced" : 80,
"needTime" : 248116,
"needYield" : 0,
"saveState" : 4036,
"restoreState" : 4036,
"isEOF" : 1,
"limitAmount" : 81,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"ts_start" : {
"$gte" : ISODate("2018-01-01T23:00:00.000Z")
}
},
"nReturned" : 80,
"executionTimeMillisEstimate" : 11263,
"works" : 248198,
"advanced" : 80,
"needTime" : 248116,
"needYield" : 0,
"saveState" : 4036,
"restoreState" : 4036,
"isEOF" : 1,
"docsExamined" : 248196,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 248196,
"executionTimeMillisEstimate" : 228,
"works" : 248197,
"advanced" : 248196,
"needTime" : 0,
"needYield" : 0,
"saveState" : 4036,
"restoreState" : 4036,
"isEOF" : 1,
"keyPattern" : {
"ts_end" : 1
},
"indexName" : "ts_end",
"isMultiKey" : false,
"multiKeyPaths" : {
"ts_end" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"ts_end" : [
"(true, new Date(1514851199000)]"
]
},
"keysExamined" : 248196,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
},
"serverInfo" : {
"host" : "localhost",
"port" : 27017,
"version" : "4.2.13",
"gitVersion" : "82dd40f60c55dae12426c08fd7150d79a0e28e23"
},
"ok" : 1.0,
"$clusterTime" : {
"clusterTime" : Timestamp(1620127924, 1),
"signature" : {
"hash" : { "$binary" : "AAAAAAAAAAAAAAAAAAAAAAAAAAA=", "$type" : "00" },
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1620127924, 1)
}
以查看它是否有所作为。
这需要很长时间,我在资源监视器中看到的是,在运行时,mongod 和 System 正在从磁盘读取数据。
我的控制台应用程序和 MongoDB(版本 4.2)都在具有 32GB RAM 的 Windows Server 2012 上运行。
有人对如何提高性能或我如何找到导致这种情况的原因有任何想法吗?
编辑:
这是一个耗时很长的查询的 explain()
输出:
explain("executionStats")