删除mongodb中的重复项不符合预期

时间:2015-04-25 20:57:38

标签: mongodb

我在mongodb中有一个表,由于基于底层源数据的必要数据解析更新,我已经意识到它已经重复了。

由于源代码的变化,代码出现意外行为并插入了许多重复项。

以下查询应返回单个值:

db.opts.find({
  $query: {
    ticker: "VXX",
    date: 20150423,
    callPut: "P",
    Strike: 27,
    maturity: 20150424
  },
  $orderby: {
    maturity: 1
  }
})

然而由于代码中的错误,遗憾的是我有多个条目用于此观察。其中一个观察结果如下:

{
  "_id": ObjectId("55396c1c44fea47bde858c78"),
  "date": 20150423,
  "ticker": "VXX",
  "callPut": "P",
  "Last": 6.1,
  "Vol": 25,
  "Chg": 0.43,
  "maturity": 20150424,
  "Symbol": "VXX150424P00027000",
  "Open Int": 809,
  "Strike": 27,
  "Ask": 6.1,
  "Bid": 5.85
}

现在,我的目标是删除共享某些字段的重复项。

我尝试运行以下内容:

db.opts.ensureIndex({
  date: 1,
  ticker: 1,
  callPut: 1,
  maturity: 1,
  Symbol: 1,
  Strike: 1
}, {
  unique: true,
  dropDups: true
})

然而,副本并没有下降。

我另外尝试过:

db.opts.createIndex({
  date: 1,
  ticker: 1,
  callPut: 1,
  maturity: 1,
  Symbol: 1,
  Strike: 1
}, {
  unique: true,
  dropDups: true
})

除了上面记录的内容之外,我没有在这些字段上定义索引。当我创建它并每天插入数据时,该集合非常香草。还没有别的。

重复数据如下所示:

> db.opts.find({$query:{ticker:"VXX",date:20150423,callPut:"P",Strike:27}})
{ "_id" : ObjectId("55396c1c44fea47bde858c78"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1c44fea47bde858cd1"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1c44fea47bde858d2a"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1d44fea47bde858d83"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1d44fea47bde858ddc"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1d44fea47bde858e35"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1e44fea47bde858e8e"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1e44fea47bde858ee7"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1e44fea47bde858f40"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1f44fea47bde858f99"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1f44fea47bde858ff2"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2044fea47bde85904b"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2044fea47bde8590a4"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2044fea47bde8590fd"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2144fea47bde859156"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2144fea47bde8591af"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2244fea47bde859208"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2244fea47bde859261"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2244fea47bde8592ba"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2344fea47bde859313"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
Type "it" for more
>

如何删除这些副本?

1 个答案:

答案 0 :(得分:0)

3.0中不再提供dropDups选项,但您可以在shell中使用一个迭代整个集合的小脚本轻松地执行此操作,并删除复制具有相同键值的文档:

var keys = {};
db.opts.find().forEach(function(doc) {
    // Create a key that concatenates the keys that together must be unique.
    var key = ''.concat(doc.date, doc.ticker, doc.callPut, doc.maturity,
                        doc.Symbol, doc.Strike);
    if (keys[key]) {
        // A doc with this key has already been seen, so remove this doc.
        db.opts.remove({_id: doc._id});
    } else {
        keys[key] = true;
    }
});

显然,请在执行此操作之前进行备份,以防它无法正常运行。