我在应用Kmeans之后计算了数据集中不同点之间的欧几里得距离,但无法获得与最小值关联的点。我的代码是

def ClusterIndicesComp(clustNum, labels_array): #list comprehension
    return np.array([i for i, x in enumerate(labels_array) if x == clustNum])

def newsol(max_gen,population,data):
    Slist = []
    #print('VAlue of NewSol Population is',population)
    for i in range(max_gen):
        cluster1=5
        K1.insert(i,cluster1)
        print('value of K1',K1)
        u,label,t,l=Kmeans_clu(cluster1, population)
        k2=Counter(l.labels_)
        print("Before addition values are\n",k2)#Count number of elements in each cluster
        k1=[t for (t, v) in k2.items() if v == 1]#Checking cluster of length one
        t1= np.array(k1)
        for b in range(len(t1)):iterating through the cluster with one point associated
            print("Value in NEW_SOL is of 1 length cluster\n",t1[b])
            plot1=data[ClusterIndicesComp(t1[b], l.labels_)] # Extract features from that cluster and store in plot1
            print("Values are in sol of plot1",plot1)
            z=[t for (t, v) in k2.items() if v >2]#getting the cluster which have more than one point associated only than the distance is calculated
            for d in range(len(z)):
                print("Value in NEW_SOL is of more than 2 length cluster\n", z[d])
                plot2 = data[ClusterIndicesComp(z[d], l.labels_)]# Extracting the features of the cluster of length more than one


现在从这里计算出plot1和plotk之间的欧式距离

                 for i in range(len(plot2)):  # To get one element at a time from plot2
                    plotk = plot2[i]
                    S = np.linalg.norm(np.array(plot1) - np.array(plotk))
                    print("Distance between plot1 and plotk is", S))  # euclidian distance is calculated
                    Slist.append(S) # List is appended with the distance
                    Smin=min(Slist) #Min value from distance is selected
                print("VAlues of Slist with min  \n",plotk,Smin)
                Slist=[] #Empty the list to move through next iteration

最佳答案

我尝试了以下解决方案,它似乎正在工作。我相信也许多个索引具有最小的欧几里得距离。

import numpy as np


plot1 = [1.0, 2.0, 3.0]
plot2 = [(1.0, 4.0, 5.0),
         (4.0, 7.0, 90.0),
         (1.0, 4.0, 5.0),
         (-1.0, -4.0, -5.0)]



indexes = []
for i in range(len(plot2)):  # To get one element at a time from plot2
    plotk = plot2[i]
    S = np.linalg.norm(np.array(plot1) - np.array(plotk))
    print("Distance between plot1 and plotk is %f"  %(S))  # euclidian distance is calculated
    if (i == 0):
        Smin = S
        Sminant = S
        indexes.append(i)
    else:
        if (S < Sminant):
            Smin = S
            indexes = []
            indexes.append(i)
        elif (S == Sminant):
            indexes.append(i)

print('indexes:')
print(indexes)

for i in range(len(indexes)):
   print("VAlues of Slist with min  \n",indexes[i], plot2[indexes[i]],Smin)


结果如下:

plot1和plotk之间的距离是2.828427
plot1和plotk之间的距离是87.195183
plot1和plotk之间的距离是2.828427
plot1和plotk之间的距离是10.198039
指标:
[0,2]
极小值的Slist
 0(1.0,4.0,5.0)2.8284271247461903
极小值的Slist
 2(1.0,4.0,5.0)2.8284271247461903

10-08 19:47