我正在实现K均值聚类算法。到目前为止,这就是我所拥有的:
import copy
import csv
import math
import random
import sys
class Centroid():
def __init__(self, coordinates, _id):
self.id = _id
self.coordinates = coordinates
self.elements = []
def __repr__(self):
return 'Centroid: ' + str(self.id)
@property
def count(self):
return len(self.elements)
def recalculate_coordinates(self):
x = [sum(y)/len(y) for y in zip(*self.elements)]
self.coordinates = x
def reset_elements(self):
self.previous_elements = []
for el in self.elements:
self.previous_elements.append(el)
self.elements = []
class Kmeans():
def __init__(self):
self.k = int(sys.argv[2])
self.prepare_data()
self.iterations = 0
def prepare_data(self):
filename = sys.argv[1]
self.dataset = []
with open(filename, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
for row in reader:
tuplified = tuple(map(float, row))
self.dataset.append(tuplified)
self.create_centroids()
def create_centroids(self):
self.centroids = []
for i in xrange(self.k):
chosen = random.choice(self.dataset)
cent = Centroid(chosen, i+1)
self.centroids.append(cent)
def main():
k = Kmeans()
def iterate(k):
k.iterations += 1
for item in k.dataset:
candidates = []
for centroid in k.centroids:
z = zip(item, centroid.coordinates)
squares = map(lambda x: (x[0]-x[1])**2, z)
added = sum(squares)
edistance = math.sqrt(added)
candidates.append((centroid, edistance))
winner = min(candidates, key=lambda x: x[1])
winner[0].add_element(item)
for centroid in k.centroids:
centroid.reset_elements()
centroid.recalculate_coordinates()
status_list = []
for centroid in k.centroids:
boole = sorted(centroid.elements) == sorted(centroid.previous_elements)
status_list.append(boole)
if False in status_list:
iterate(k)
print k.centroids
print k.iterations
iterate(k)
if __name__ == '__main__':
main()
但是,我一直收到错误
RuntimeError: maximum recursion depth exceeded in cmp
。我尝试了几次重构,均未成功。谁能告诉我可能是什么问题。先感谢您。 最佳答案
如果错误在这一行:
boole = sorted(centroid.elements) == sorted(centroid.previous_elements)
最有可能发生的是,您在
centroids.elements
和centroids.previous_elements
中都有循环引用,因此比较操作(在sorted
调用和==
中均执行)继续循环遍历每个列表。此行为的简单演示(在Python 3中):
>>> x = []
>>> y = [x]
>>> x.append(y)
>>> x == y
Traceback (most recent call last)
....
x == y
RecursionError: maximum recursion depth exceeded in comparison