By Tingting, 19 May, 2025
Forums

使用Rstudio

library(cluster)
library(factoextra)
library(dplyr)
mat<-read.csv("C:/Users/27928/Desktop/生根率.csv", header = T, row.names = 1)
mat<-scale(mat)#将数据标准化
fviz_nbclust(mat, kmeans, method = "wss")  #wss代表方法
#method为用于估计最佳簇数的方法,可能的值是“silhouette”(平均廓形宽度),“wss”(平方和内的总数)和“gap_stat”(间隙统计)
set.seed(123)
km <- kmeans(mat, centers = 3, nstart = 25)
km$size
km$cluster

# 加载必要的包
library(ggplot2)

# 假设 mat 是单维度数据,km 是 K - means 聚类结果
# 将数据和聚类结果组合成数据框
df <- data.frame(value = as.numeric(mat), cluster = as.factor(km$cluster))

# 绘制箱线图
ggplot(df, aes(x = cluster, y = value)) +
  geom_boxplot(fill = c("#E7B800", "#43CD80", "#2E9FDF")) +
  labs(title = "K - means with k = 3",
       x = "Cluster",
       y = "Value") +
  theme_test() +
  theme(legend.position = "top")

# 绘制散点图
ggplot(df, aes(x = seq_along(value), y = value, color = cluster)) +
  geom_point(size = 2) +
  scale_color_manual(values = c("#E7B800", "#43CD80", "#2E9FDF")) +
  labs(title = "K - means with k = 3",
       x = "Cluster",
       y = "Value") +
  theme_test() +
  theme(legend.position = "top")