Mongodb对复杂的JSON进行聚合,匹配,分组和排序

时间:2015-11-09 09:35:03

标签: json mongodb sabre

我收到了大量的JSON(飞行数据),我一直在考虑使用mongo作为查询/过滤它的工具,然后再返回UI。

这是json response from Sabre (flight search)

{
  "OTA_AirLowFareSearchRS": {
    "PricedItineraries": {
      "PricedItinerary": [
        {
          "SequenceNumber": 1,
          "AirItinerary": {
            "OriginDestinationOptions": {
              "OriginDestinationOption": [
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-12T11:40:00",
                      "ArrivalDateTime": "2015-11-12T17:35:00",
                      "FlightNumber": "1980"
                    },
                    {
                      "DepartureDateTime": "2015-11-12T19:35:00",
                      "ArrivalDateTime": "2015-11-13T02:00:00",
                      "FlightNumber": "760"
                    }
                  ]
                },
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-19T08:25:00",
                      "ArrivalDateTime": "2015-11-19T11:40:00",
                      "FlightNumber": "763"
                    },
                    {
                      "DepartureDateTime": "2015-11-19T12:55:00",
                      "ArrivalDateTime": "2015-11-19T15:05:00",
                      "FlightNumber": "1985"
                    }
                  ]
                }
              ]
            }
          },
          "AirItineraryPricingInfo": [
            {
              "ItinTotalFare": {
                "TotalFare": {
                  "Amount": 269.56,
                  "CurrencyCode": "GBP"
                }
              }
            }
          ]
        },
        {
          "SequenceNumber": 2,
          "AirItinerary": {
            "OriginDestinationOptions": {
              "OriginDestinationOption": [
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-12T16:45:00",
                      "ArrivalDateTime": "2015-11-12T22:40:00",
                      "FlightNumber": "1986"
                    },
                    {
                      "DepartureDateTime": "2015-11-13T00:40:00",
                      "ArrivalDateTime": "2015-11-13T07:10:00",
                      "FlightNumber": "762"
                    }
                  ]
                },
                {
                  "ElapsedTime": 640,
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-19T08:25:00",
                      "ArrivalDateTime": "2015-11-19T11:40:00",
                      "FlightNumber": "763"
                    },
                    {
                      "DepartureDateTime": "2015-11-19T12:55:00",
                      "ArrivalDateTime": "2015-11-19T15:05:00",
                      "FlightNumber": "1985"
                    }
                  ]
                }
              ]
            }
          },
          "AirItineraryPricingInfo": [
            {
              "ItinTotalFare": {
                "TotalFare": {
                  "Amount": 269.56,
                  "CurrencyCode": "GBP"
                }
              }
            }
          ]
        },
        {
          "SequenceNumber": 6,
          "AirItinerary": {
            "OriginDestinationOptions": {
              "OriginDestinationOption": [
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-12T11:40:00",
                      "ArrivalDateTime": "2015-11-12T17:35:00",
                      "FlightNumber": "1980"
                    },
                    {
                      "DepartureDateTime": "2015-11-12T19:35:00",
                      "ArrivalDateTime": "2015-11-13T02:00:00",
                      "FlightNumber": "760"
                    }
                  ]
                },
                {
                  "FlightSegment": [
                    {
                      "DepartureDateTime": "2015-11-19T03:15:00",
                      "ArrivalDateTime": "2015-11-19T06:30:00",
                      "FlightNumber": "761"
                    },
                    {
                      "DepartureDateTime": "2015-11-19T12:55:00",
                      "ArrivalDateTime": "2015-11-19T15:05:00",
                      "FlightNumber": "1985"
                    }
                  ]
                }
              ]
            }
          },
          "AirItineraryPricingInfo": [
            {
              "ItinTotalFare": {
                "TotalFare": {
                  "Amount": 269.56
                }
              }
            }
          ]
        }
      ]
    }
  }
}

我一直在尝试将其转换为我想要的视图,但却在努力应对所有嵌套数组的复杂性。我的问题是如何才能达到这样的预期结果:

{
    'Price': 269.56, <-- //Group on price (TotalFare)
    'Outbound': [{
        <Outbound Flights> <--- //$push? flights at array position [0] of OriginDestinationOption 
    }],
    'Inbound': [{
        <Inbound Flights> <-- // flights at array position [1] of OriginDestinationOption
    }]
},
...

JSON中这些数据的位置是:

Price: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItineraryPricingInfo[0].ItinTotalFare.TotalFare.Amount;
Inbound: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItinerary.OriginDestinationOptions.OriginDestinationOption[0]
Outbound: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItinerary.OriginDestinationOptions.OriginDestinationOption[1]

1 个答案:

答案 0 :(得分:3)

使用当前的MongoDB版本,唯一可以获得更接近您想要的结果的方法是使用 aggregation framework ,并在{{1}的前提下工作数组将有两个元素,你需要 $first $last 运算符来选择数组中的第一个和最后一个元素 $unwind 运算符。目前(基于上述假设)您可能需要运行此管道:

OriginDestinationOption

将产生结果(来自样本数据):

db.flights.aggregate([
    { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary" },
    { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo" },
    { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption" },    
    {
        "$project": {
            "Price": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo.ItinTotalFare.TotalFare.Amount",
            "DestinationOptions": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption"
        }
    },    
    {
        "$group": {
            "_id": "$Price",
            "Outbound" : { "$first": "$DestinationOptions" },
            "Inbound" : { "$last": "$DestinationOptions" }
        }
    }
])

但是,对于将来的版本( MongoDB 3.2 and newer ),有两个运营商 $slice $arrayElemAt < / strong>这将有助于您产生所需的结果。 $slice 运算符返回数组的子集, $arrayElemAt 返回指定数组索引处的元素。

因此,您将按如下方式实施管道 -

案例1。使用 $slice 运算符:

/* 1 */
{
    "result" : [ 
        {
            "_id" : 269.56,
            "Outbound" : {
                "ElapsedTime" : 620,
                "FlightSegment" : [ 
                    {
                        "DepartureDateTime" : "2015-11-12T11:40:00",
                        "ArrivalDateTime" : "2015-11-12T17:35:00",
                        "StopQuantity" : 0,
                        "FlightNumber" : "1980",
                        "ElapsedTime" : 235,
                        "DepartureAirport" : {
                            "LocationCode" : "LHR",
                            "TerminalID" : "2",
                            "content" : ""
                        },
                        "ArrivalAirport" : {
                            "LocationCode" : "IST",
                            "TerminalID" : "I",
                            "content" : ""
                        },
                        "OperatingAirline" : {
                            "Code" : "TK",
                            "FlightNumber" : "1980",
                            "content" : ""
                        }
                    }, 
                    {
                        "DepartureDateTime" : "2015-11-12T19:35:00",
                        "ArrivalDateTime" : "2015-11-13T02:00:00",
                        "StopQuantity" : 0,
                        "FlightNumber" : "760",
                        "ResBookDesigCode" : "W",
                        "ElapsedTime" : 265,
                        "DepartureAirport" : {
                            "LocationCode" : "IST",
                            "TerminalID" : "I",
                            "content" : ""
                        },
                        "ArrivalAirport" : {
                            "LocationCode" : "DXB",
                            "TerminalID" : "1",
                            "content" : ""
                        },
                        "OperatingAirline" : {
                            "Code" : "TK",
                            "FlightNumber" : "760",
                            "content" : ""
                        },
                        "Equipment" : [ 
                            {
                                "AirEquipType" : "343",
                                "content" : ""
                            }
                        ],
                        "MarketingAirline" : {
                            "Code" : "TK",
                            "content" : ""
                        },
                        "MarriageGrp" : "I",
                        "DepartureTimeZone" : {
                            "GMTOffset" : 2
                        },
                        "ArrivalTimeZone" : {
                            "GMTOffset" : 4
                        },
                        "TPA_Extensions" : {
                            "eTicket" : {
                                "Ind" : true
                            }
                        }
                    }
                ]
            },
            "Inbound" : {
                "ElapsedTime" : 730,
                "FlightSegment" : [ 
                    {
                        "DepartureDateTime" : "2015-11-19T08:25:00",
                        "ArrivalDateTime" : "2015-11-19T11:40:00",
                        "StopQuantity" : 0,
                        "FlightNumber" : "763",
                        "ResBookDesigCode" : "W",
                        "ElapsedTime" : 315,
                        "DepartureAirport" : {
                            "LocationCode" : "DXB",
                            "TerminalID" : "1",
                            "content" : ""
                        },
                        "ArrivalAirport" : {
                            "LocationCode" : "IST",
                            "TerminalID" : "I",
                            "content" : ""
                        },
                        "OperatingAirline" : {
                            "Code" : "TK",
                            "FlightNumber" : "763",
                            "content" : ""
                        },
                        "Equipment" : [ 
                            {
                                "AirEquipType" : "330",
                                "content" : ""
                            }
                        ],
                        "MarketingAirline" : {
                            "Code" : "TK",
                            "content" : ""
                        },
                        "MarriageGrp" : "O",
                        "DepartureTimeZone" : {
                            "GMTOffset" : 4
                        },
                        "ArrivalTimeZone" : {
                            "GMTOffset" : 2
                        },
                        "TPA_Extensions" : {
                            "eTicket" : {
                                "Ind" : true
                            }
                        }
                    }, 
                    {
                        "DepartureDateTime" : "2015-11-19T14:25:00",
                        "ArrivalDateTime" : "2015-11-19T16:35:00",
                        "StopQuantity" : 0,
                        "FlightNumber" : "1971",
                        "ResBookDesigCode" : "W",
                        "ElapsedTime" : 250,
                        "DepartureAirport" : {
                            "LocationCode" : "IST",
                            "TerminalID" : "I",
                            "content" : ""
                        },
                        "ArrivalAirport" : {
                            "LocationCode" : "LHR",
                            "TerminalID" : "2",
                            "content" : ""
                        },
                        "OperatingAirline" : {
                            "Code" : "TK",
                            "FlightNumber" : "1971",
                            "content" : ""
                        },
                        "Equipment" : [ 
                            {
                                "AirEquipType" : "32B",
                                "content" : ""
                            }
                        ],
                        "MarketingAirline" : {
                            "Code" : "TK",
                            "content" : ""
                        },
                        "MarriageGrp" : "I",
                        "DepartureTimeZone" : {
                            "GMTOffset" : 2
                        },
                        "ArrivalTimeZone" : {
                            "GMTOffset" : 0
                        },
                        "TPA_Extensions" : {
                            "eTicket" : {
                                "Ind" : true
                            }
                        }
                    }
                ]
            }
        }
    ],
    "ok" : 1
}

案例2。使用 $arrayElemAt 运算符:

db.flights.aggregate([
    { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary" },
    { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo" },
    { "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption" },    
    {
        "$project": {
            "Price": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo.ItinTotalFare.TotalFare.Amount",
            "DestinationOptions": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption"
        }
    },    
    {
        "$group": {
            "_id": "$Price",
            "DestinationOptions" : { "$push": "$DestinationOptions" }
        }
    },
    { 
        "$project": { 
            "Inbound": { "$slice": [ "$DestinationOptions", 0, 1] },
            "Outbound": { "$slice": [ "$DestinationOptions", 1, 1 ] },
            "Price": "$_id",
            "_id": 0
        }
    }
])