我有一个JSON文档,如下所示:
{
"Region": "Main",
"MarketLocations": [
{
"MarketName": "Central",
"MarketId": 1,
"SalesCategories": {
"Produce": [
{
"Type": "Apple",
"Name": "Granny Smith",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 24,
"Calories": 45,
"Price": 0.29
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Apple Pie",
"Price": 14.25
}
],
"RestaurantItems": [
{
"Name": "Turkey Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
}
],
"NonPerishable": [
{
"Name": "Honey Mustard",
"Type": "Condiments"
}
]
}
},
{
"MarketName": "Southern",
"MarketId": 2,
"SalesCategories": {
"Produce": [
{
"Type": "Apple",
"Name": "Granny Smith",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 24,
"Calories": 45,
"Price": 0.29
},
{
"Type": "Plums",
"Name": "Red Plums",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 12,
"Calories": 21,
"Price": 0.33
},
{
"Type": "Pears",
"Name": "Golden Nature",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 20,
"Calories": 40,
"Price": 0.45
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Apple Pie",
"Price": 14.25
}
],
"RestaurantItems": [
{
"Name": "Turkey Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
}
],
"NonPerishable": [
{
"Name": "Honey Mustard",
"Type": "Condiments"
}
]
}
},
{
"MarketName": "Western",
"MarketId": 3,
"SalesCategories": {
"Produce": [
{
"Type": "Plums",
"Name": "Red Plums",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 12,
"Calories": 21,
"Price": 0.33
},
{
"Type": "Pears",
"Name": "Golden Nature",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 20,
"Calories": 40,
"Price": 0.45
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Plum Pie",
"Price": 18.25
}
],
"RestaurantItems": [
{
"Name": "Ham Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
},
{
"Name": "Chicken Soup",
"Price": 2.25,
"PreparationTimeInMinutes": 5
}
],
"NonPerishable": [
{
"Name": "Mayo",
"Type": "Condiments"
},
{
"Name": "Syrup",
"Type": "Condiments"
},
{
"Name": "Ginger",
"Type": "Spices"
}
]
}
}
]
}
我有以下U-SQL,它处理在Visual Studio中运行的这个JSON文件:
DECLARE @in string=@"/JsonDoc2.json";
DECLARE @out string=@"Output/JsonDoc2.csv";
@produce =
EXTRACT Name string,
DatePicked DateTime,
ShelfLifeInDays int,
Calories int,
Price decimal,
MarketId string,
MarketName string
FROM @in
USING new MultiLevelJsonExtractor("MarketLocations[*].SalesCategories.Produce[*]",
false,
"Name",
"DatePicked",
"ShelfLifeInDays",
"Calories",
"Price",
"MarketId",
"MarketName");
OUTPUT @produce
TO @out
USING Outputters.Csv(outputHeader : true);
执行时没有错误。问题是我具体指定了我想要的销售类别('生产')。我想更改此查询,以便包含所有类别名称的所有销售类别(生产,烘焙食品等)。我还没有找到办法做到这一点。
答案 0 :(得分:1)
NewtonSoft JsonFunctions类的JsonType
方法返回一个MAP
值,它是一个键值对。然后,至少在使用CROSS APPLY
和EXPLODE
进行一些其他操作之后,您才能引用该键以获取JSON属性/对象/数组名称。
以您的示例为例,我可以进行以下工作:
REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];
USING Microsoft.Analytics.Samples.Formats.Json;
DECLARE @input string = @"/input/myinputfile.json";
DECLARE @output string = @"output/output.csv";
@json =
EXTRACT Region string,
MarketName string,
SalesCategories string // get the SalesCategories as JSON
FROM @input
USING new MultiLevelJsonExtractor("MarketLocations[*].SalesCategories",
true,
"Region",
"MarketName",
"SalesCategories"
);
// Convert the json string to tuple/MAP
@working =
SELECT Region,
MarketName,
JsonFunctions.JsonTuple(SalesCategories) AS x
FROM @json;
// Explode the tuple as key-value pair;
@working =
SELECT Region,
MarketName,
key,
value
FROM @working
CROSS APPLY
EXPLODE(x) AS y(key, value);
// Explode the value which is JSON
@working =
SELECT Region,
MarketName,
key,
JsonFunctions.JsonTuple(y) AS z
FROM @working
CROSS APPLY
EXPLODE(JsonFunctions.JsonTuple(value).Values) AS x(y);
// Prep the result, naming the items you want
@result =
SELECT Region,
MarketName,
key,
z["Type"] AS Type,
z["Name"] AS Name,
z["DatePicked"] AS DatePicked,
z["ShelfLifeInDays"] AS ShelfLifeInDays,
z["Calories"] AS Calories,
z["Price"] AS Price,
z["DateMade"] AS DateMade,
z["PreparationTimeInMinutes"] AS PreparationTimeInMinutes
FROM @working;
OUTPUT @result
TO @output
USING Outputters.Csv(quoting:false);