我在mongodb中有一个表,由于基于底层源数据的必要数据解析更新,我已经意识到它已经重复了。
由于源代码的变化,代码出现意外行为并插入了许多重复项。
以下查询应返回单个值:
db.opts.find({
$query: {
ticker: "VXX",
date: 20150423,
callPut: "P",
Strike: 27,
maturity: 20150424
},
$orderby: {
maturity: 1
}
})
然而由于代码中的错误,遗憾的是我有多个条目用于此观察。其中一个观察结果如下:
{
"_id": ObjectId("55396c1c44fea47bde858c78"),
"date": 20150423,
"ticker": "VXX",
"callPut": "P",
"Last": 6.1,
"Vol": 25,
"Chg": 0.43,
"maturity": 20150424,
"Symbol": "VXX150424P00027000",
"Open Int": 809,
"Strike": 27,
"Ask": 6.1,
"Bid": 5.85
}
现在,我的目标是删除共享某些字段的重复项。
我尝试运行以下内容:
db.opts.ensureIndex({
date: 1,
ticker: 1,
callPut: 1,
maturity: 1,
Symbol: 1,
Strike: 1
}, {
unique: true,
dropDups: true
})
然而,副本并没有下降。
我另外尝试过:
db.opts.createIndex({
date: 1,
ticker: 1,
callPut: 1,
maturity: 1,
Symbol: 1,
Strike: 1
}, {
unique: true,
dropDups: true
})
除了上面记录的内容之外,我没有在这些字段上定义索引。当我创建它并每天插入数据时,该集合非常香草。还没有别的。
重复数据如下所示:
> db.opts.find({$query:{ticker:"VXX",date:20150423,callPut:"P",Strike:27}})
{ "_id" : ObjectId("55396c1c44fea47bde858c78"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1c44fea47bde858cd1"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1c44fea47bde858d2a"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1d44fea47bde858d83"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1d44fea47bde858ddc"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1d44fea47bde858e35"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1e44fea47bde858e8e"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1e44fea47bde858ee7"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1e44fea47bde858f40"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1f44fea47bde858f99"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c1f44fea47bde858ff2"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2044fea47bde85904b"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2044fea47bde8590a4"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2044fea47bde8590fd"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2144fea47bde859156"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2144fea47bde8591af"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2244fea47bde859208"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2244fea47bde859261"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2244fea47bde8592ba"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
{ "_id" : ObjectId("55396c2344fea47bde859313"), "date" : 20150423, "ticker" : "V
XX", "callPut" : "P", "Last" : 6.1, "Vol" : 25, "Chg" : 0.43, "maturity" : 20150
424, "Symbol" : "VXX150424P00027000", "Open Int" : 809, "Strike" : 27, "Ask" : 6
.1, "Bid" : 5.85 }
Type "it" for more
>
如何删除这些副本?
答案 0 :(得分:0)
3.0中不再提供dropDups
选项,但您可以在shell中使用一个迭代整个集合的小脚本轻松地执行此操作,并删除复制具有相同键值的文档:
var keys = {};
db.opts.find().forEach(function(doc) {
// Create a key that concatenates the keys that together must be unique.
var key = ''.concat(doc.date, doc.ticker, doc.callPut, doc.maturity,
doc.Symbol, doc.Strike);
if (keys[key]) {
// A doc with this key has already been seen, so remove this doc.
db.opts.remove({_id: doc._id});
} else {
keys[key] = true;
}
});
显然,请在执行此操作之前进行备份,以防它无法正常运行。