我正在使用经度/纬度填充大距离矩阵(n = 5000),并且正在寻找一种更快的方法。
样例代码
import pandas as pd
import numpy as np
# Calculate distance lat/long (Thanks @Jamie)
def spherical_dist(pos1, pos2, r=3958.75):
pos1 = np.array(pos1)
pos2 = np.array(pos2)
pos1 = pos1 * np.pi / 180
pos2 = pos2 * np.pi / 180
cos_lat1 = np.cos(pos1[..., 0])
cos_lat2 = np.cos(pos2[..., 0])
cos_lat_d = np.cos(pos1[..., 0] - pos2[..., 0])
cos_lon_d = np.cos(pos1[..., 1] - pos2[..., 1])
return r * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))
# Emtpy dataframe
dat = pd.DataFrame({'id': ['a', 'b', 'c', 'd'], 'lat': [-20, -21, -22, -24], 'lon': [-100, -101, -102, -103]})
dist_mat = pd.DataFrame(0, index=dat.id, columns=dat.id)
dist_mat
# Populate
for i in range(4):
for j in range(4):
dist_mat.iloc[i, j] = spherical_dist([dat.iloc[i, 1], dat.iloc[i, 2]], [dat.iloc[j, 1], dat.iloc[j, 2]])
输出量
> dist_mat
id a b c d
id
a 0.000000 94.668315 189.039530 336.591787
b 94.668315 0.000000 94.373392 243.429659
c 189.039530 94.373392 0.000000 152.118003
d 336.591787 243.429659 152.118003 0.000000
最佳答案
def populate(lat_lis, lon_lis, r=3958.75):
lat_mtx = np.array([lat_lis]).T * np.pi / 180
lon_mtx = np.array([lon_lis]).T * np.pi / 180
cos_lat_i = np.cos(lat_mtx)
cos_lat_j = np.cos(lat_mtx)
cos_lat_J = np.repeat(cos_lat_j, len(lat_mtx), axis=1).T
lat_Mtx = np.repeat(lat_mtx, len(lat_mtx), axis=1).T
cos_lat_d = np.cos(lat_mtx - lat_Mtx)
lon_Mtx = np.repeat(lon_mtx, len(lon_mtx), axis=1).T
cos_lon_d = np.cos(lon_mtx - lon_Mtx)
mtx = r * np.arccos(cos_lat_d - cos_lat_i*cos_lat_J*(1 - cos_lon_d))
return mtx