我正在使用经度/纬度填充大距离矩阵(n = 5000),并且正在寻找一种更快的方法。
示例代码
import pandas as pd
import numpy as np
# Calculate distance lat/long (Thanks @Jamie)
def spherical_dist(pos1, pos2, r=3958.75):
pos1 = np.array(pos1)
pos2 = np.array(pos2)
pos1 = pos1 * np.pi / 180
pos2 = pos2 * np.pi / 180
cos_lat1 = np.cos(pos1[..., 0])
cos_lat2 = np.cos(pos2[..., 0])
cos_lat_d = np.cos(pos1[..., 0] - pos2[..., 0])
cos_lon_d = np.cos(pos1[..., 1] - pos2[..., 1])
return r * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))
# Emtpy dataframe
dat = pd.DataFrame({'id': ['a', 'b', 'c', 'd'], 'lat': [-20, -21, -22, -24], 'lon': [-100, -101, -102, -103]})
dist_mat = pd.DataFrame(0, index=dat.id, columns=dat.id)
dist_mat
# Populate
for i in range(4):
for j in range(4):
dist_mat.iloc[i, j] = spherical_dist([dat.iloc[i, 1], dat.iloc[i, 2]], [dat.iloc[j, 1], dat.iloc[j, 2]])
输出
> dist_mat
id a b c d
id
a 0.000000 94.668315 189.039530 336.591787
b 94.668315 0.000000 94.373392 243.429659
c 189.039530 94.373392 0.000000 152.118003
d 336.591787 243.429659 152.118003 0.000000
答案 0 :(得分:1)
def populate(lat_lis, lon_lis, r=3958.75):
lat_mtx = np.array([lat_lis]).T * np.pi / 180
lon_mtx = np.array([lon_lis]).T * np.pi / 180
cos_lat_i = np.cos(lat_mtx)
cos_lat_j = np.cos(lat_mtx)
cos_lat_J = np.repeat(cos_lat_j, len(lat_mtx), axis=1).T
lat_Mtx = np.repeat(lat_mtx, len(lat_mtx), axis=1).T
cos_lat_d = np.cos(lat_mtx - lat_Mtx)
lon_Mtx = np.repeat(lon_mtx, len(lon_mtx), axis=1).T
cos_lon_d = np.cos(lon_mtx - lon_Mtx)
mtx = r * np.arccos(cos_lat_d - cos_lat_i*cos_lat_J*(1 - cos_lon_d))
return mtx
答案 1 :(得分:1)
另一种使用库地理模型进行距离计算的解决方案:
from geopy.distance import lonlat, distance, great_circle
df = pd.DataFrame({'id': ['a', 'b', 'c', 'd'], 'lat': [-20, -21, -22, -24], 'lon': [-100, -101, -102, -103]})
#using merge to generate all possibilities between origin and destination
df= pd.merge(df.assign(key=0), df.assign(key=0),suffixes=('', '_x') , on='key').drop('key', axis=1)
#using the library geopy to gives the distance between 2 points
df['Miles'] = df.apply(
(lambda row:distance(lonlat(row['lon'], row['lat']),
lonlat(row['lon_x'], row['lat_x'])).miles), axis=1)
#create the crosstab
df = df.groupby(['id', 'id_x'])['Miles'].max().unstack()
print(df)
输出,其距离以度量geodesic
计算(默认):
distance()= geodesic()
id_x a b c d
id
a 0.000000 94.516982 188.743084 335.820435
b 94.516982 0.000000 94.228293 242.812242
c 188.743084 94.228293 0.000000 151.653020
d 335.820435 242.812242 151.653020 0.000000
以度量great_circle
计算得出的距离:这似乎是您的方法
df['Miles'] = df.apply(
(lambda row:great_circle(lonlat(row['lon'], row['lat']),
lonlat(row['lon_x'], row['lat_x'])).miles), axis=1)
id_x a b c d
id
a 0.000000 94.668589 189.040078 336.592761
b 94.668589 0.000000 94.373665 243.430364
c 189.040078 94.373665 0.000000 152.118443
d 336.592761 243.430364 152.118443 0.000000
您可以将里程数更改为KM,只需将distance()或great_circle()的扩展名从.miles
更改为.km