使用Rstudio
library(cluster)
library(factoextra)
library(dplyr)
mat<-read.csv("C:/Users/27928/Desktop/生根率.csv", header = T, row.names = 1)
mat<-scale(mat)#将数据标准化
fviz_nbclust(mat, kmeans, method = "wss") #wss代表方法
#method为用于估计最佳簇数的方法,可能的值是“silhouette”(平均廓形宽度),“wss”(平方和内的总数)和“gap_stat”(间隙统计)
set.seed(123)
km <- kmeans(mat, centers = 3, nstart = 25)
km$size
km$cluster
# 加载必要的包
library(ggplot2)
# 假设 mat 是单维度数据,km 是 K - means 聚类结果
# 将数据和聚类结果组合成数据框
df <- data.frame(value = as.numeric(mat), cluster = as.factor(km$cluster))
# 绘制箱线图
ggplot(df, aes(x = cluster, y = value)) +
geom_boxplot(fill = c("#E7B800", "#43CD80", "#2E9FDF")) +
labs(title = "K - means with k = 3",
x = "Cluster",
y = "Value") +
theme_test() +
theme(legend.position = "top")
# 绘制散点图
ggplot(df, aes(x = seq_along(value), y = value, color = cluster)) +
geom_point(size = 2) +
scale_color_manual(values = c("#E7B800", "#43CD80", "#2E9FDF")) +
labs(title = "K - means with k = 3",
x = "Cluster",
y = "Value") +
theme_test() +
theme(legend.position = "top")