我正在使用经度/纬度填充大距离矩阵(n = 5000),并且正在寻找一种更快的方法。

样例代码

import pandas as pd
import numpy as np

# Calculate distance lat/long (Thanks @Jamie)
def spherical_dist(pos1, pos2, r=3958.75):
    pos1 = np.array(pos1)
    pos2 = np.array(pos2)
    pos1 = pos1 * np.pi / 180
    pos2 = pos2 * np.pi / 180
    cos_lat1 = np.cos(pos1[..., 0])
    cos_lat2 = np.cos(pos2[..., 0])
    cos_lat_d = np.cos(pos1[..., 0] - pos2[..., 0])
    cos_lon_d = np.cos(pos1[..., 1] - pos2[..., 1])
    return r * np.arccos(cos_lat_d - cos_lat1 * cos_lat2 * (1 - cos_lon_d))

# Emtpy dataframe
dat = pd.DataFrame({'id': ['a', 'b', 'c', 'd'], 'lat': [-20, -21, -22, -24], 'lon': [-100, -101, -102, -103]})
dist_mat = pd.DataFrame(0, index=dat.id, columns=dat.id)
dist_mat

# Populate
for i in range(4):
    for j in range(4):
        dist_mat.iloc[i, j] = spherical_dist([dat.iloc[i, 1], dat.iloc[i, 2]], [dat.iloc[j, 1], dat.iloc[j, 2]])


输出量

> dist_mat

id  a   b   c   d
id
a   0.000000    94.668315   189.039530  336.591787
b   94.668315   0.000000    94.373392   243.429659
c   189.039530  94.373392   0.000000    152.118003
d   336.591787  243.429659  152.118003  0.000000

最佳答案

def populate(lat_lis, lon_lis, r=3958.75):
    lat_mtx = np.array([lat_lis]).T * np.pi / 180
    lon_mtx = np.array([lon_lis]).T * np.pi / 180

    cos_lat_i = np.cos(lat_mtx)
    cos_lat_j = np.cos(lat_mtx)
    cos_lat_J = np.repeat(cos_lat_j, len(lat_mtx), axis=1).T

    lat_Mtx = np.repeat(lat_mtx, len(lat_mtx), axis=1).T
    cos_lat_d = np.cos(lat_mtx - lat_Mtx)

    lon_Mtx = np.repeat(lon_mtx, len(lon_mtx), axis=1).T
    cos_lon_d = np.cos(lon_mtx - lon_Mtx)

    mtx = r * np.arccos(cos_lat_d - cos_lat_i*cos_lat_J*(1 - cos_lon_d))
    return mtx

08-26 21:12