R中cluster中包含多种聚类算法,下面通过某个数据集,进行三种聚类算法的评估
# ============================
# 评估聚类 #
# ============================ # 引入fpc包(cluster.stats)
library(fpc)
# 引入包库(clara、fanny)
library(cluster) #=====调用聚类算法======================================================= # 确定簇心个数
cluster_num <- 3 # 读取数据
data <- read.csv("data.csv",header = T) # 调用kmeans算法
km <- kmeans(data,cluster_num) # 调用(clara)算法
cl <- clara(data,cluster_num) # 调用模糊C-Means聚类算法
fan <- fanny(data,cluster_num) #=====调用聚类算法======================================================= # 聚类评价统计量
km_stats <- cluster.stats(dist(data), km$cluster)
cl_stats <- cluster.stats(dist(data), cl$cluster)
fcm_stats <- cluster.stats(dist(data), fan$clustering) # 信息数据框表化
info <- data.frame(
Algorithm = c("KMeans", "Clara", "FCM"),
Silwidth = c(km_stats$avg.silwidth,cl_stats$avg.silwidth,fcm_stats$avg.silwidth),
AverageWithin = c(km_stats$average.within, cl_stats$average.within, fcm_stats$average.within),
averageBetween = c(km_stats$average.between, cl_stats$average.between, fcm_stats$average.between),
ch = c(km_stats$ch, cl_stats$ch, fcm_stats$ch)
) # 重命名字段
names(info)[2:5] <- c("轮廓系数","簇内平均距离","簇间平均聚类","Calinski和Harabasz指数")