如何在没有id的情况下删除重复数据

时间:2015-05-12 07:36:52

标签: mongodb

我需要将大量数据导入mongoDB。

但是很多数据可能会重复,如何以优雅的方式删除mongoDB中的重复数据。

在导入新记录之前检查Collection.find({ALL_THE_FIELDS}).exist?是非常丑陋和低效的。

待办事项index也效率低下,因为您需要为数百个字段编制索引。

我想稍后导入所有数据并删除集合中的重复记录。

怎么做?感谢

以下是一份文件样本

我需要导入数百万种文档,并且会有许多重复的文档。所以我需要找到一种方法来删除重复的文档

{
  "_id": ObjectId("5550649669702d1901070000"),
  "report_type": "com_disagg",
  "item": "WHEAT",
  "exchange": "CHICAGO BOARD OF TRADE",
  "product_exchange": "WHEAT - CHICAGO BOARD OF TRADE",
  "date": new Date("2009-12-08T08:00:00+0800"),
  "timestamp_utc": NumberLong(1260230400000),
  "updated_at": new Date(1431331990505),
  "created_at": new Date(1431331990498),
  "symbol_code_id": ObjectId("5550645169702d1864020000"),
  "symbol": "W",
  "CFTC_Contract_Market_Code": "001602",
  "CFTC_Market_Code": "CBT",
  "CFTC_Region_Code": "00",
  "CFTC_Commodity_Code": "001",
  "Open_Interest_All": "421790.0",
  "Prod_Merc_Positions_Long_ALL": "34399.0",
  "Prod_Merc_Positions_Short_ALL": "176446.0",
  "Swap_Positions_Long_All": "167220.0",
  "Swap__Positions_Short_All": "18118.0",
  "Swap__Positions_Spread_All": "11837.0",
  "M_Money_Positions_Long_ALL": "61511.0",
  "M_Money_Positions_Short_ALL": "42756.0",
  "M_Money_Positions_Spread_ALL": "34393.0",
  "Other_Rept_Positions_Long_ALL": "14072.0",
  "Other_Rept_Positions_Short_ALL": "21546.0",
  "Other_Rept_Positions_Spread_ALL": "65296.0",
  "Tot_Rept_Positions_Long_All": "388728.0",
  "Tot_Rept_Positions_Short_All": "370392.0",
  "NonRept_Positions_Long_All": "33062.0",
  "NonRept_Positions_Short_All": "51398.0",
  "Open_Interest_Old": "281017.0",
  "Prod_Merc_Positions_Long_Old": "16525.0",
  "Prod_Merc_Positions_Short_Old": "122260.0",
  "Swap_Positions_Long_Old": "117778.0",
  "Swap_Positions_Short_Old": "5864.0",
  "Swap_Positions_Spread_Old": "4270.0",
  "M_Money_Positions_Long_Old": "54489.0",
  "M_Money_Positions_Short_Old": "44462.0",
  "M_Money_Positions_Spread_Old": "10319.0",
  "Other_Rept_Positions_Long_Old": "9407.0",
  "Other_Rept_Positions_Short_Old": "15516.0",
  "Other_Rept_Positions_Spread_Old": "47020.0",
  "Tot_Rept_Positions_Long_Old": "259809.0",
  "Tot_Rept_Positions_Short_Old": "249712.0",
  "NonRept_Positions_Long_Old": "21208.0",
  "NonRept_Positions_Short_Old": "31304.0",
  "Open_Interest_Other": "140773.0",
  "Prod_Merc_Positions_Long_Other": "17875.0",
  "Prod_Merc_Positions_Short_Other": "54186.0",
  "Swap_Positions_Long_Other": "51313.0",
  "Swap_Positions_Short_Other": "14125.0",
  "Swap_Positions_Spread_Other": "5696.0",
  "M_Money_Positions_Long_Other": "21387.0",
  "M_Money_Positions_Short_Other": "12659.0",
  "M_Money_Positions_Spread_Other": "9708.0",
  "Other_Rept_Positions_Long_Other": "7275.0",
  "Other_Rept_Positions_Short_Other": "8640.0",
  "Other_Rept_Positions_Spread_Othr": "15665.0",
  "Tot_Rept_Positions_Long_Other": "128919.0",
  "Tot_Rept_Positions_Short_Other": "120679.0",
  "NonRept_Positions_Long_Other": "11855.0",
  "NonRept_Positions_Short_Other": "20094.0",
  "Change_in_Open_Interest_All": "88.0",
  "Change_in_Prod_Merc_Long_All": "-287.0",
  "Change_in_Prod_Merc_Short_All": "-4032.0",
  "Change_in_Swap_Long_All": "-768.0",
  "Change_in_Swap_Short_All": "1679.0",
  "Change_in_Swap_Spread_All": "268.0",
  "Change_in_M_Money_Long_All": "-1992.0",
  "Change_in_M_Money_Short_All": "2243.0",
  "Change_in_M_Money_Spread_All": "45.0",
  "Change_in_Other_Rept_Long_All": "1475.0",
  "Change_in_Other_Rept_Short_All": "-4019.0",
  "Change_in_Other_Rept_Spread_All": "1368.0",
  "Change_in_Tot_Rept_Long_All": "110.0",
  "Change_in_Tot_Rept_Short_All": "-2449.0",
  "Change_in_NonRept_Long_All": "-22.0",
  "Change_in_NonRept_Short_All": "2537.0",
  "Pct_of_Open_Interest_All": "100.0",
  "Pct_of_OI_Prod_Merc_Long_All": "8.2",
  "Pct_of_OI_Prod_Merc_Short_All": "41.8",
  "Pct_of_OI_Swap_Long_All": "39.6",
  "Pct_of_OI_Swap_Short_All": "4.3",
  "Pct_of_OI_Swap_Spread_All": "2.8",
  "Pct_of_OI_M_Money_Long_All": "14.6",
  "Pct_of_OI_M_Money_Short_All": "10.1",
  "Pct_of_OI_M_Money_Spread_All": "8.2",
  "Pct_of_OI_Other_Rept_Long_All": "3.3",
  "Pct_of_OI_Other_Rept_Short_All": "5.1",
  "Pct_of_OI_Other_Rept_Spread_All": "15.5",
  "Pct_of_OI_Tot_Rept_Long_All": "92.2",
  "Pct_of_OI_Tot_Rept_Short_All": "87.8",
  "Pct_of_OI_NonRept_Long_All": "7.8",
  "Pct_of_OI_NonRept_Short_All": "12.2",
  "Pct_of_Open_Interest_Old": "100.0",
  "Pct_of_OI_Prod_Merc_Long_Old": "5.9",
  "Pct_of_OI_Prod_Merc_Short_Old": "43.5",
  "Pct_of_OI_Swap_Long_Old": "41.9",
  "Pct_of_OI_Swap_Short_Old": "2.1",
  "Pct_of_OI_Swap_Spread_Old": "1.5",
  "Pct_of_OI_M_Money_Long_Old": "19.4",
  "Pct_of_OI_M_Money_Short_Old": "15.8",
  "Pct_of_OI_M_Money_Spread_Old": "3.7",
  "Pct_of_OI_Other_Rept_Long_Old": "3.3",
  "Pct_of_OI_Other_Rept_Short_Old": "5.5",
  "Pct_of_OI_Other_Rept_Spread_Old": "16.7",
  "Pct_of_OI_Tot_Rept_Long_Old": "92.5",
  "Pct_of_OI_Tot_Rept_Short_Old": "88.9",
  "Pct_of_OI_NonRept_Long_Old": "7.5",
  "Pct_of_OI_NonRept_Short_Old": "11.1",
  "Pct_of_Open_Interest_Other": "100.0",
  "Pct_of_OI_Prod_Merc_Long_Other": "12.7",
  "Pct_of_OI_Prod_Merc_Short_Other": "38.5",
  "Pct_of_OI_Swap_Long_Other": "36.5",
  "Pct_of_OI_Swap_Short_Other": "10.0",
  "Pct_of_OI_Swap_Spread_Other": "4.0",
  "Pct_of_OI_M_Money_Long_Other": "15.2",
  "Pct_of_OI_M_Money_Short_Other": "9.0",
  "Pct_of_OI_M_Money_Spread_Other": "6.9",
  "Pct_of_OI_Other_Rept_Long_Other": "5.2",
  "Pct_of_OI_Other_Rept_Short_Other": "6.1",
  "Pct_of_OI_Other_Rept_Spread_Othr": "11.1",
  "Pct_of_OI_Tot_Rept_Long_Other": "91.6",
  "Pct_of_OI_Tot_Rept_Short_Other": "85.7",
  "Pct_of_OI_NonRept_Long_Other": "8.4",
  "Pct_of_OI_NonRept_Short_Other": "14.3",
  "Traders_Tot_All": "359.0",
  "Traders_Prod_Merc_Long_All": "48.0",
  "Traders_Prod_Merc_Short_All": "76.0",
  "Traders_Swap_Long_All": "18.0",
  "Traders_Swap_Short_All": "9.0",
  "Traders_Swap_Spread_All": "21.0",
  "Traders_M_Money_Long_All": "62.0",
  "Traders_M_Money_Short_All": "60.0",
  "Traders_M_Money_Spread_All": "45.0",
  "Traders_Other_Rept_Long_All": "59.0",
  "Traders_Other_Rept_Short_All": "61.0",
  "Traders_Other_Rept_Spread_All": "74.0",
  "Traders_Tot_Rept_Long_All": "263.0",
  "Traders_Tot_Rept_Short_All": "276.0",
  "Traders_Tot_Old": "337.0",
  "Traders_Prod_Merc_Long_Old": "40.0",
  "Traders_Prod_Merc_Short_Old": "72.0",
  "Traders_Swap_Long_Old": "19.0",
  "Traders_Swap_Short_Old": "8.0",
  "Traders_Swap_Spread_Old": "13.0",
  "Traders_M_Money_Long_Old": "59.0",
  "Traders_M_Money_Short_Old": "56.0",
  "Traders_M_Money_Spread_Old": "34.0",
  "Traders_Other_Rept_Long_Old": "50.0",
  "Traders_Other_Rept_Short_Old": "65.0",
  "Traders_Other_Rept_Spread_Old": "61.0",
  "Traders_Tot_Rept_Long_Old": "224.0",
  "Traders_Tot_Rept_Short_Old": "253.0",
  "Traders_Tot_Other": "206.0",
  "Traders_Prod_Merc_Long_Other": "25.0",
  "Traders_Prod_Merc_Short_Other": "62.0",
  "Traders_Swap_Long_Other": "11.0",
  "Traders_Swap_Short_Other": "11.0",
  "Traders_Swap_Spread_Other": "17.0",
  "Traders_M_Money_Long_Other": "18.0",
  "Traders_M_Money_Short_Other": "22.0",
  "Traders_M_Money_Spread_Other": "16.0",
  "Traders_Other_Rept_Long_Other": "37.0",
  "Traders_Other_Rept_Short_Other": "37.0",
  "Traders_Other_Rept_Spread_Other": "46.0",
  "Traders_Tot_Rept_Long_Other": "133.0",
  "Traders_Tot_Rept_Short_Other": "171.0",
  "Conc_Gross_LE_4_TDR_Long_All": "21.3",
  "Conc_Gross_LE_4_TDR_Short_All": "21.3",
  "Conc_Gross_LE_8_TDR_Long_All": "35.4",
  "Conc_Gross_LE_8_TDR_Short_All": "30.0",
  "Conc_Net_LE_4_TDR_Long_All": "20.5",
  "Conc_Net_LE_4_TDR_Short_All": "18.1",
  "Conc_Net_LE_8_TDR_Long_All": "34.2",
  "Conc_Net_LE_8_TDR_Short_All": "24.6",
  "Conc_Gross_LE_4_TDR_Long_Old": "27.3",
  "Conc_Gross_LE_4_TDR_Short_Old": "22.1",
  "Conc_Gross_LE_8_TDR_Long_Old": "39.6",
  "Conc_Gross_LE_8_TDR_Short_Old": "31.4",
  "Conc_Net_LE_4_TDR_Long_Old": "27.2",
  "Conc_Net_LE_4_TDR_Short_Old": "20.0",
  "Conc_Net_LE_8_TDR_Long_Old": "38.9",
  "Conc_Net_LE_8_TDR_Short_Old": "27.9",
  "Conc_Gross_LE_4_TDR_Long_Other": "37.4",
  "Conc_Gross_LE_4_TDR_Short_Other": "27.6",
  "Conc_Gross_LE_8_TDR_Long_Other": "49.2",
  "Conc_Gross_LE_8_TDR_Short_Other": "37.0",
  "Conc_Net_LE_4_TDR_Long_Other": "35.5",
  "Conc_Net_LE_4_TDR_Short_Other": "22.8",
  "Conc_Net_LE_8_TDR_Long_Other": "42.6",
  "Conc_Net_LE_8_TDR_Short_Other": "29.0",
  "Contract_Units": "(CONTRACTS OF 5,000 BUSHELS)",
  "CFTC_SubGroup_Code": "A10",
  "FutOnly_or_Combined": "Combined"
}

0 个答案:

没有答案