我有一张带有拾取和降落纬度/长数据的大表。这个表有几十万个记录,我想找到每个拾取和下降之间的距离。
可以在BigQuery中使用SQL来完成吗?
答案 0 :(得分:4)
尝试下面,应该是你的好开始
SELECT
orderid,
car_number,
ROUND(distance) AS distance,
ROUND(next_distance) AS next_distance
FROM JS(
(
// input table
SELECT
orderid,
car_number,
pickup_lon,
pickup_lat,
dropoff_lon,
dropoff_lat,
LEAD(pickup_lon) OVER(PARTITION BY car_number ORDER BY orderid) AS next_pickup_lon,
LEAD(pickup_lat) OVER(PARTITION BY car_number ORDER BY orderid) AS next_pickup_lat
FROM
(SELECT 1 AS orderid, 1 AS car_number, -121.23200000000001 AS pickup_lon, 38.1964 AS pickup_lat, -117.48 AS dropoff_lon, 34.5894 AS dropoff_lat ),
(SELECT 2 AS orderid, 1 AS car_number, -118.76 AS pickup_lon, 34.1445 AS pickup_lat, -122.26 AS dropoff_lon, 37.7606 AS dropoff_lat),
(SELECT 3 AS orderid, 2 AS car_number, -117.736 AS pickup_lon, 33.5761 AS pickup_lat, -117.19333333333333 AS dropoff_lon, 34.47484444444444 AS dropoff_lat)
) ,
// input columns
orderid, car_number, pickup_lon, pickup_lat, dropoff_lon, dropoff_lat, next_pickup_lon, next_pickup_lat,
// output schema
"[{name: 'orderid', type: 'integer'},
{name: 'car_number', type: 'integer'},
{name: 'distance', type: 'float'},
{name: 'next_distance', type: 'float'}]",
// function
"function(r, emit){
emit({
orderid: r.orderid, car_number: r.car_number,
distance: dist(r.pickup_lon, r.pickup_lat, r.dropoff_lon, r.dropoff_lat),
next_distance: dist(r.dropoff_lon, r.dropoff_lat, r.next_pickup_lon, r.next_pickup_lat)
});
function deg2rad(deg) {
return deg * (Math.PI/180)
}
function dist(pickup_lon, pickup_lat, dropoff_lon, dropoff_lat) {
var R = 3959; // Radius of the earth in miles
var dLat = deg2rad(dropoff_lat-pickup_lat);
var dLon = deg2rad(dropoff_lon-pickup_lon);
var a = Math.sin(dLat/2) * Math.sin(dLat/2) +
Math.cos(deg2rad(pickup_lat)) * Math.cos(deg2rad(dropoff_lat)) *
Math.sin(dLon/2) * Math.sin(dLon/2);
var c = 2 * R * Math.atan2(Math.sqrt(a), Math.sqrt(1-a));
return c;
}
}"
)
结果是:
orderid car_number distance next_distance
1 1 325.0 79.0
2 1 317.0 NaN
3 2 69.0 NaN
来自评论:我的表架构是这样的:
car_number (string)
pu_datetime (timestamp)
do_datetime (timestamp)
pu_lat (float)
pu_long (float)
do_lat (float)
do_long (float)
已添加以匹配您的表架构
SELECT
pu_datetime,
car_number,
ROUND(distance) AS distance,
ROUND(next_distance) AS next_distance
FROM JS(
(
// input table
SELECT
pu_datetime,
car_number,
pu_lon,
pu_lat,
do_lon,
do_lat,
LEAD(pu_lon) OVER(PARTITION BY car_number ORDER BY pu_datetime) AS next_pu_lon,
LEAD(pu_lat) OVER(PARTITION BY car_number ORDER BY pu_datetime) AS next_pu_lat
FROM
(SELECT timestamp('2016-07-01 13:00:00') AS pu_datetime, '1' AS car_number, -121.23200000000001 AS pu_lon, 38.1964 AS pu_lat, -117.48 AS do_lon, 34.5894 AS do_lat ),
(SELECT timestamp('2016-07-02 10:00:00') AS pu_datetime, '1' AS car_number, -118.76 AS pu_lon, 34.1445 AS pu_lat, -122.26 AS do_lon, 37.7606 AS do_lat),
(SELECT timestamp('2016-07-03 11:00:00') AS pu_datetime, '2' AS car_number, -117.736 AS pu_lon, 33.5761 AS pu_lat, -117.19333333333333 AS do_lon, 34.47484444444444 AS do_lat)
) ,
// input columns
pu_datetime, car_number, pu_lon, pu_lat, do_lon, do_lat, next_pu_lon, next_pu_lat,
// output schema
"[{name: 'pu_datetime', type: 'timestamp'},
{name: 'car_number', type: 'string'},
{name: 'distance', type: 'float'},
{name: 'next_distance', type: 'float'}]",
// function
"function(r, emit){
emit({
pu_datetime: r.pu_datetime, car_number: r.car_number,
distance: dist(r.pu_lon, r.pu_lat, r.do_lon, r.do_lat),
next_distance: dist(r.do_lon, r.do_lat, r.next_pu_lon, r.next_pu_lat)
});
function deg2rad(deg) {
return deg * (Math.PI/180)
}
function dist(pu_lon, pu_lat, do_lon, do_lat) {
var R = 3959; // Radius of the earth in miles
var dLat = deg2rad(do_lat-pu_lat);
var dLon = deg2rad(do_lon-pu_lon);
var a = Math.sin(dLat/2) * Math.sin(dLat/2) +
Math.cos(deg2rad(pu_lat)) * Math.cos(deg2rad(do_lat)) *
Math.sin(dLon/2) * Math.sin(dLon/2);
var c = 2 * R * Math.atan2(Math.sqrt(a), Math.sqrt(1-a));
return c;
}
}"
)
答案 1 :(得分:0)
此功能将计算两点之间的里程。这不是驾驶距离。你必须使用谷歌API,我相信每天有2500次点击。
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE Function [dbo].[udf-Geo-CalcMiles] (@Long1 float,@Lat1 float,@Long2 float,@Lat2 Float)
Returns Float as
Begin
Declare @Miles Float
Set @Miles = 0
Set @Miles = (Sin(Radians(@Lat1)) * Sin(Radians(@Lat2))) + (Cos(Radians(@Lat1)) * Cos(Radians(@Lat2)) * Cos(Radians(@Long2) - Radians(@Long1)))
Return Case When @Miles =0 then 0 else abs((3958.75 * Atan(Sqrt(1 - power(@Miles, 2)) / @Miles))) end
End
所以例如
;with cteBase as (
Select Top 10
Box_Nr
,From_Lat=Box_Lat
,From_Lng=Box_Lng
,To_Lat = Lead(Box_Lat,1) over (Order By Box_Nr)
,To_Lng = Lead(Box_Lng,1) over (Order By Box_Nr)
from [dbo].[USPS-Collection-Point] Order by Box_Nr
)
Select *
,Distance = [dbo].[udf-Geo-CalcMiles] (From_Lng,From_Lat,To_Lng,To_Lat)
From cteBase
返回
Box_Nr From_Lat From_Lng To_Lat To_Lng Distance
1 41.6947535 -71.1394048 41.6980289 -71.1341529 0.353027299635122
2 41.6980289 -71.1341529 41.682761 -71.124812 1.15978643547294
3 41.682761 -71.124812 41.7051854 -71.1477813 1.95061187457874
4 41.7051854 -71.1477813 41.7080851 -71.1441519 0.274205428604983
5 41.7080851 -71.1441519 41.702242 -71.1282279 0.915266219941071
6 41.702242 -71.1282279 41.711085 -71.142123 0.941821767229312
7 41.711085 -71.142123 41.7055301 -71.1516977 0.625473329587972
8 41.7055301 -71.1516977 41.7071749 -71.1513423 0.115113681673717
9 41.7071749 -71.1513423 41.7079604 -71.1543306 0.163416766676813
10 41.7079604 -71.1543306 41.667808 -71.154372 2.77425950023261