TSNE提供了一种有效的数据降维方式,让我们可以在2维或3维的空间中展示聚类结果。
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from sklearn.manifold import TSNE
import pandas as pd
import matplotlib.pyplot as mp inputfile = 'data/consumption_data.xls'
outputfile = 'tmp/data_type3.xls'
data = pd.read_excel(inputfile, index_col='Id')
data_zs = 1.0 * (data - data.mean()) / data.std() from sklearn.cluster import KMeans
k = 3
iteration = 800
model = KMeans(n_clusters=k, n_jobs =4, max_iter=iteration) # n_jobs 并发数设置为4
model.fit(data_zs) # 开始聚类 r1 = pd.Series(model.labels_).value_counts() # 统计各个类别的数目
r2 = pd.DataFrame(model.cluster_centers_) # 找出聚类中心
r = pd.concat([data,pd.Series(model.labels_,index = data.index)],axis=1)
r.columns = list(data.columns) + [r'聚类类别'] tsne = TSNE()
tsne.fit_transform(data_zs) # 进行数据降维
tsne = pd.DataFrame(tsne.embedding_, index=data_zs.index) mp.rcParams['font.sans-serif'] = ['SimHei']
mp.rcParams['axes.unicode_minus'] = False
d = tsne[r[r'聚类类别'] == 0]
mp.plot(d[0], d[1], 'r.')
d = tsne[r[r'聚类类别'] == 1]
mp.plot(d[0], d[1], 'go')
d = tsne[r[r'聚类类别'] == 2]
mp.plot(d[0], d[1], 'b*')
mp.show()
运行结果: