import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
data = pd.read_csv('./010-data_multivar.csv',header=None)
#拆分数据
dataset_X,dataset_y = data.iloc[:,:-],data.iloc[:,-]
# print(dataset_X.head())
dataset_X = dataset_X.values
dataset_y = dataset_y.values

无标签数据集可视化,将第一列feature作为X,第二列feature作为y

def visual_2D_dataset_dist(dataset):
'''将二维数据集dataset显示在散点图中'''
assert dataset.shape[]==,'only support dataset with 2 features'
plt.figure()
X=dataset[:,]
Y=dataset[:,]
plt.scatter(X,Y,marker='v',c='g',label='dataset') X_min,X_max=np.min(X)-,np.max(X)+
Y_min,Y_max=np.min(Y)-,np.max(Y)+
plt.title('dataset distribution')
plt.xlim(X_min,X_max)
plt.ylim(Y_min,Y_max)
plt.xlabel('feature_0')
plt.ylabel('feature_1')
plt.legend() visual_2D_dataset_dist(dataset_X)
机器学习-kmeans的使用-LMLPHP

构造 kmeans

from sklearn.cluster import KMeans
#init
kmeans = KMeans(init='k-means++',n_clusters=4,n_init=5)
kmeans.fit(dataset_X)

机器学习-kmeans的使用-LMLPHP

将dataset_X聚类效果可视化

def visual_kmeans_effect(k_means,dataset):
assert dataset.shape[1]==2,'only support dataset with 2 features'
X=dataset[:,0]
Y=dataset[:,1]
X_min,X_max=np.min(X)-1,np.max(X)+1
Y_min,Y_max=np.min(Y)-1,np.max(Y)+1
# meshgrid 生成网格点坐标矩阵
X_values,Y_values=np.meshgrid(np.arange(X_min,X_max,0.01),
np.arange(Y_min,Y_max,0.01))
# 预测网格点的标记
predict_labels=k_means.predict(np.c_[X_values.ravel(),Y_values.ravel()])
predict_labels=predict_labels.reshape(X_values.shape)
plt.figure()
plt.imshow(predict_labels,interpolation='nearest',
extent=(X_values.min(),X_values.max(),
Y_values.min(),Y_values.max()),
cmap=plt.cm.Paired,
aspect='auto',
origin='lower') # 将数据集绘制到图表中
plt.scatter(X,Y,marker='v',facecolors='none',edgecolors='k',s=30) # 将中心点绘制到图中
centroids=k_means.cluster_centers_
plt.scatter(centroids[:,0],centroids[:,1],marker='o',
s=100,linewidths=2,color='k',zorder=5,facecolors='b')
plt.title('K-Means effect graph')
plt.xlim(X_min,X_max)
plt.ylim(Y_min,Y_max)
plt.xlabel('feature_0')
plt.ylabel('feature_1')
plt.show() visual_kmeans_effect(kmeans,dataset_X)

机器学习-kmeans的使用-LMLPHP

 

# 鸢尾花聚类

from sklearn.datasets import load_iris
datairis = load_iris()
dataset = datairis.data
from sklearn.cluster import KMeans
#init
kmeans = KMeans(init='k-means++',n_clusters=,n_init=)
kmeans.fit(dataset)
print(kmeans.labels_) #去除标签
print(datairis)
机器学习-kmeans的使用-LMLPHP
 
05-06 12:33