Question

在适用时，使用Mongoose，Express和JSONStream从MongoDB发送文档集时，我看到了一些意想不到的性能数字。我想比较mongoose find和stream。我原本期望对于更大的文档集合来说流更快，但是很惊讶地看到toArray变体一直优于它们。我在想通配符可能是我使用JSONStream将响应传递给express。我的简单终点如下：

// run this after connecting to mongoose

var app = express();

var myModel = ...; // get mongoose model
var myCollection = myModel.collection;

// fetch 500 - use lean w/ mongoose
var queryOpts = { lean : true, limit : 500 };

// 35.958
app.get("/api/v1/stream", function(req, res) {
    res.set('Content-Type', 'application/json');
    myModel.find({ }, null, queryOpts)
    .stream().pipe(JSONStream.stringify()).pipe(res);
});

// 36.228
app.get("/api/v1/mongostream", function(req, res) {
    res.set('Content-Type', 'application/json');
    myCollection.find({ }, queryOpts)
    .stream().pipe(JSONStream.stringify()).pipe(res);
});

// 23.399ms
app.get("/api/v1/mongoarray", function(req, res) {
    myCollection.find({ }, queryOpts)
    .toArray(function(err, results) {
        res.json(results);
    });
});

// 23.908
app.get("/api/v1/array", function(req, res) {
    myModel.find({ }, null, queryOpts, function(err, results) {
        res.json(results);
    });
});

app.listen(4000);

每个端点上方的注释表示ab -k -n 1000 <endpoint>报告的平均请求时间。令我感到惊讶的是，将游标流传输到JSONStream到快速响应比仅一次获取所有并发送要慢50％。我原本期望将数据流式传输更好。

我正在做的事情显然是错的吗？我错误地认为流应该更快？如果JSONStream是罪魁祸首，那么从游标流到表达响应的最佳方法是什么 - 如果我将其全部缓冲，那么它与其中一个数组变体不同吗？

请注意，DB服务器是mongo 2.4.x，mongo驱动程序是1.4.x。

更新我只对提取/流式传输部分进行了定时，而不是序列化方面。数组变体和流式传输在时间上相似，与阵列变体略有差异（14.9ms对15.3ms）。以下是终点：

// No serialization - just timing
// all are nearly the same - slight edge to
// arrays
app.get("/api/v2/stream", function(req, res) {
    var start = process.hrtime();
    res.set('Content-Type', 'application/json');
    myModel.find({ }, null, queryOpts)
    .stream().on('end', function() {
        res.json(process.hrtime(start));
    });
});

app.get("/api/v2/mongostream", function(req, res) {
    var start = process.hrtime();
    res.set('Content-Type', 'application/json');
    myCollection.find({ }, queryOpts)
    .stream().on('end', function() {
        res.json(process.hrtime(start));
    });
});

app.get("/api/v2/mongoarray", function(req, res) {
    var start = process.hrtime();
    myCollection.find({ }, queryOpts)
    .toArray(function(err, results) {
        res.json(process.hrtime(start));
    });
});

app.get("/api/v2/array", function(req, res) {
    var start = process.hrtime();
    myModel.find({ }, null, queryOpts, function(err, results) {
        res.json(process.hrtime(start));
    });
});

更新2 collection.stats()和collection.find({}).explain()的输出如下：

> db.myCollection.stats();
{
    "ns" : "myDb.myCollection",
    "count" : 1000,
    "size" : 419264,
    "avgObjSize" : 419.264,
    "storageSize" : 847872,
    "numExtents" : 4,
    "nindexes" : 2,
    "lastExtentSize" : 655360,
    "paddingFactor" : 1,
    "systemFlags" : 1,
    "userFlags" : 0,
    "totalIndexSize" : 98112,
    "indexSizes" : {
        "_id_" : 40880,
        "_meta.tags_1" : 57232
    },
    "ok" : 1
}
> db.myCollection.find({}).explain();
{
    "cursor" : "BasicCursor",
    "isMultiKey" : false,
    "n" : 1000,
    "nscannedObjects" : 1000,
    "nscanned" : 1000,
    "nscannedObjectsAllPlans" : 1000,
    "nscannedAllPlans" : 1000,
    "scanAndOrder" : false,
    "indexOnly" : false,
    "nYields" : 0,
    "nChunkSkips" : 0,
    "millis" : 0,
    "indexBounds" : {

    },
    "server" : "LOCAL:27017"
}

Answer 1

从mongo shell中查看db.mycollection.stats（）和db.mycollection.find（{}）。explain（）的输出会很有帮助。

考虑到您所看到的性能数据，可能只是对于足够大的集合，非索引查询的冷btree命中是所有时间的地方，而不是Node本身的任何开销。

mongoose / mongodb流与阵列性能

1 个答案: