我有这种天蓝色的流分析,它从事件中心接收事件并输出到blob。
每个输入仅对应一个地理位置。
我还有另一个输入是blob引用,其中包含在geojson中定义的区域。
由于匹配区域的串联,我需要使我的位置与这些区域匹配,并在每个位置返回一行带有列区域的单行。
示例: 活动中心
{"longitude": 0, "latitude": 0, "posId":1}
斑点
[
{
"boxName": "mediterraneanbox",
"geoJSON": "{\"geometry\":{\"type\": \"Polygon\",\"coordinates\":[[[-25.7,25.5,0.0],[-12.06,25.5,0.0],[1.58,25.5,0.0],[15.22,25.5,0.0],[28.86,25.5,0.0],[42.5,25.5,0.0],[42.5,30.04,0.0],[42.5,34.58,0.0],[42.5,39.12,0.0],[42.5,43.66,0.0],[42.5,48.2,0.0],[28.86,48.2,0.0],[15.22,48.2,0.0],[1.58,48.2,0.0],[-12.06,48.2,0.0],[-25.7,48.2,0.0],[-25.7,43.66,0.0],[-25.7,39.12,0.0],[-25.7,34.58,0.0],[-25.7,30.04,0.0],[-25.7,25.5,0.0]]]}}"
},
{
"boxName": "jordanmoroccotunisiabox",
"geoJSON": "{\"geometry\":{\"type\": \"Polygon\",\"coordinates\":[[[-19,26,0.0],[-5.6,26,0.0],[7.8,26,0.0],[21.2,26,0.0],[34.6,26,0.0],[48,26,0.0],[48,30.4,0.0],[48,34.8,0.0],[48,39.2,0.0],[48,43.6,0.0],[48,48,0.0],[34.6,48,0.0],[21.2,48,0.0],[7.8,48,0.0],[-5.6,48,0.0],[-19,48,0.0],[-19,43.6,0.0],[-19,39.2,0.0],[-19,34.8,0.0],[-19,30.4,0.0],[-19,26,0.0]]]}}"
},
]
ASA查询
wITH [flattened] AS
(
SELECT
GetMetadataPropertyValue(input, 'EventId') AS Id,
input.PartitionId,
d.arrayvalue.positionReport.longitude,
d.arrayvalue.positionReport.latitude,
CreatePoint(TRY_CAST( d.arrayvalue.positionReport.latitude AS FLOAT), TRY_CAST(d.arrayvalue.positionReport.longitude AS FLOAT)) point
FROM input partition by PartitionId
CROSS APPLY GetArrayElements(input.messages) as d
WHERE d.arrayvalue.positionReport.timeL IS NOT NULL AND d.arrayvalue.positionReport.longitude IS NOT NULL AND
d.arrayvalue.positionReport.latitude IS NOT NULL
),
[bounded] as (
select f.id, b.boxname, f.PartitionId
FROM [flattened] f partition by PartitionId
join (
SELECT boxname, UDF.JSONparse([geoJSON]) geoJSON
FROM [boundingboxes]
) b ON ST_WITHIN(f.point, b.geoJSON.geometry) =1
),
[merged] as
(
select collect() as bbox, id, PartitionId
from [bounded] partition by PartitionId
group by TUMBLINGWINDOW(microsecond, 1), id, PartitionId
),
[match] as (
SELECT f.id, f.longitude, f.latitude, UDF.CleanBBox(m.bbox) as bbox,
CONCAT(
CASE WHEN ST_WITHIN(f.point, UDF.JsonParse('{"type": "Polygon","coordinates":[[[-25.7,25.5,0.0],[-12.06,25.5,0.0],[1.58,25.5,0.0],[15.22,25.5,0.0],[28.86,25.5,0.0],[42.5,25.5,0.0],[42.5,30.04,0.0],[42.5,34.58,0.0],[42.5,39.12,0.0],[42.5,43.66,0.0],[42.5,48.2,0.0],[28.86,48.2,0.0],[15.22,48.2,0.0],[1.58,48.2,0.0],[-12.06,48.2,0.0],[-25.7,48.2,0.0],[-25.7,43.66,0.0],[-25.7,39.12,0.0],[-25.7,34.58,0.0],[-25.7,30.04,0.0],[-25.7,25.5,0.0]]]}')) = 1 THEN '[mediterraneanbox]' ELSE '' END ,
CASE WHEN ST_WITHIN(f.point, UDF.JsonParse('{"type": "Polygon","coordinates":[[[-19,26,0.0],[-5.6,26,0.0],[7.8,26,0.0],[21.2,26,0.0],[34.6,26,0.0],[48,26,0.0],[48,30.4,0.0],[48,34.8,0.0],[48,39.2,0.0],[48,43.6,0.0],[48,48,0.0],[34.6,48,0.0],[21.2,48,0.0],[7.8,48,0.0],[-5.6,48,0.0],[-19,48,0.0],[-19,43.6,0.0],[-19,39.2,0.0],[-19,34.8,0.0],[-19,30.4,0.0],[-19,26,0.0]]]}')) = 1 THEN '[jordanmoroccotunisiabox]' ELSE '' END
) as boundingbox
FROM [flattened] f partition by PartitionId left join [merged] m partition by PartitionId
on f.PartitionId = m.PartitionId and f.id = m.id and datediff(microsecond, f, m) between 0 and 1
)
select *, CASE WHEN bbox != boundingbox THEN 'ERROR' ELSE 'OK' END val from [match]
这并没有给我带来良好的结果,因为有时我会在这些区域看到重复的记录。我以为这是由开窗引起的,所以我将最小值(微秒和1)设置为可能,但这似乎不起作用。
================================================ ========================== 编辑1
似乎我的问题是按照不是真正唯一的消息ID进行分组,因此我更改为使用唯一的eventId。
我现在的问题是我被迫增加SU,我认为这是因为我没有按分区进行分区,我想按PartitionId进行分区,但是不能这样做,因为当我尝试开始工作时我得到了错误,指出分区数并不总是相同。
================================================ ========================== 编辑2
我设法在查询中按partitionid进行分区(在展平中添加了新列以传递partitionid),从而显着提高了读取的输入事件的数量。
无论如何,看一下输出,看来这仍然不适用于所有情况。我仍然得到类似的东西:
{"id":"8214ab10-d62d-18cc-3ca2-91dff9718043","longitude":-9.25,"latitude":32.312666666666665,"bbox":"","boundingbox":"[mediterraneanbox][jordanmoroccotunisiabox]","val":"ERROR"}
我希望字段bbox和boundingbox具有相同的值。