Question

MongoDB C++ driver允许使用两种方式（among others）创建BSON对象。

基于流：

auto builder = bsoncxx::builder::stream::document{};
bsoncxx::document::value doc_value = builder
  << "name" << "MongoDB"
  << "type" << "database"
  << "count" << 1
  << "versions" << bsoncxx::builder::stream::open_array
    << "v3.2" << "v3.0" << "v2.6"
  << close_array
  << "info" << bsoncxx::builder::stream::open_document
    << "x" << 203
    << "y" << 102
  << bsoncxx::builder::stream::close_document
  << bsoncxx::builder::stream::finalize;

基于解析JSON字符串：

std::string doc = "{ "
  "\"name\" : \"MongoDB\","
  "\"type\" : \"database\","
  "\"count\" : 1,"
  "\"versions\": [ \"v3.2\", \"v3.0\", \"v2.6\" ],"
  "\"info\" : {"
    "\"x\" : 203,"
    "\"y\" : 102"
  "}"
"}";
bsoncxx::document::value bsoncxx::from_json(doc);

从性能的角度来看，我想知道哪一个最方便。我倾向于认为，“替代”下的流替代方案所涉及的函数调用的数量将比处理JSON字符串要差，但可能相反或相等。

我尝试在MongoDB C ++驱动程序文档中找到有关此的一些信息，但没有走运。真的很欢迎任何信息...在此先谢谢您！

Answer 1

最后我做了一些基准测试。我正在分享我的结果，以防它们对其他人有用。驱动程序版本为3.4.0。

这是基于流的版本：

#include <iostream>

#include <bsoncxx/builder/stream/document.hpp>
#include <bsoncxx/json.hpp>

#include <mongocxx/client.hpp>
#include <mongocxx/instance.hpp>


int main(int, char**) {
    mongocxx::instance inst{};
    mongocxx::client conn{mongocxx::uri{}};

    for (unsigned int ix = 0; ix < 10000000 ; ++ix) {
       auto builder = bsoncxx::builder::stream::document{};
       bsoncxx::document::value doc_value = builder
      << "name" << "MongoDB"
      << "type" << "database"
      << "count" << 1
      << "versions" << bsoncxx::builder::stream::open_array
        << "v3.2" << "v3.0" << "v2.6"
      << bsoncxx::builder::stream::close_array
      << "info" << bsoncxx::builder::stream::open_document
        << "x" << 203
        << "y" << 102
      << bsoncxx::builder::stream::close_document
          << bsoncxx::builder::stream::finalize;
    }
}

这是基于文本分析的版本：

#include <iostream>

#include <bsoncxx/builder/stream/document.hpp>
#include <bsoncxx/json.hpp>

#include <mongocxx/client.hpp>
#include <mongocxx/instance.hpp>


int main(int, char**) {
    mongocxx::instance inst{};
    mongocxx::client conn{mongocxx::uri{}};

    for (unsigned int ix = 0; ix < 10000000 ; ++ix) {
        std::string doc = "{ "
      "\"name\" : \"MongoDB\","
      "\"type\" : \"database\","
      "\"count\" : 1,"
      "\"versions\": [ \"v3.2\", \"v3.0\", \"v2.6\" ],"
      "\"info\" : {"
        "\"x\" : 203,"
        "\"y\" : 102"
      "}"
    "}";
       bsoncxx::document::value doc_value = bsoncxx::from_json(doc);
    }
}

如您所见，在两种情况下，程序的结构和迭代次数（10,000,000）是相同的。

编译使用：

c++ --std=c++11 test-stream.cpp -o test-stream $(pkg-config --cflags --libs libmongocxx)
c++ --std=c++11 test-textparsing.cpp -o test-textparsing $(pkg-config --cflags --libs libmongocxx)

带有测试流的结果（三次）：

$ time ./test-stream ; time ./test-stream ; time ./test-stream 

real    0m16,454s
user    0m16,200s
sys 0m0,084s

real    0m17,034s
user    0m16,900s
sys 0m0,012s

real    0m18,812s
user    0m18,708s
sys 0m0,036s

具有test-textparsing的结果（也是三遍）：

$ time ./test-textparsing ; time ./test-textparsing ; time ./test-textparsing 

real    0m53,678s
user    0m53,576s
sys 0m0,024s

real    1m0,203s
user    0m59,788s
sys 0m0,116s

real    0m57,259s
user    0m56,824s
sys 0m0,200s

结论：总体而言，基于流的策略要优于基于文本的策略。

对实验进行同行检查非常有助于确认结果;）

编辑：我已基于基本构建器添加了一个测试用例：

#include <iostream>

#include <bsoncxx/builder/stream/document.hpp>
#include <bsoncxx/json.hpp>

#include <mongocxx/client.hpp>
#include <mongocxx/instance.hpp>

using bsoncxx::builder::basic::kvp;

int main(int, char**) {
    mongocxx::instance inst{};
    mongocxx::client conn{mongocxx::uri{}};

    for (unsigned int ix = 0; ix < 10000000 ; ++ix) {
       bsoncxx::builder::basic::document basic_builder{};
       basic_builder.append(kvp("name", "MongoDB"));
       basic_builder.append(kvp("type", "database"));
       basic_builder.append(kvp("count", 1));

       bsoncxx::builder::basic::array array_builder{};
       array_builder.append("v3.2");
       array_builder.append("v3.0");
       array_builder.append("v2.6");
       basic_builder.append(kvp("versions", array_builder.extract()));  

       bsoncxx::builder::basic::document object_builder{};
       object_builder.append(kvp("x", 203));
       object_builder.append(kvp("y", 102));
       basic_builder.append(kvp("info", object_builder.extract()));  

       bsoncxx::document::value doc_value = basic_builder.extract();
    }
}

以这种方式编译：

c++ --std=c++11 test-basic.cpp -o test-basic $(pkg-config --cflags --libs libmongocxx)

我再次运行了具有以下结果的测试：

basic
-----

real    0m20,725s
user    0m20,656s
sys 0m0,004s

real    0m20,651s
user    0m20,620s
sys 0m0,008s

real    0m20,102s
user    0m20,088s
sys 0m0,000s

stream
------

real    0m11,841s
user    0m11,780s
sys 0m0,024s

real    0m11,967s
user    0m11,932s
sys 0m0,008s

real    0m11,634s
user    0m11,616s
sys 0m0,008s

textparsing
-----------

real    0m37,209s
user    0m37,184s
sys 0m0,004s

real    0m36,336s
user    0m36,208s
sys 0m0,028s

real    0m35,840s
user    0m35,648s
sys 0m0,048s

结论：

金牌：基于流的方法
银牌：基本的建造方法（与基于流的方法相比，时间增加了81.8％）
铜牌：文本解析方法（与基于流的方法相比，时间增加了227.7％）

在开始实验之前，我会打赌基本的构建会获胜，但是最后它是基于流的。我的test-basic.cpp代码上可能有些麻烦吗？还是结果有意义？

MongoC ++驱动程序BSON的构建：基于流与基于字符串解析。哪一个性能更好？

1 个答案: