全部使用R语言
1、主成分分析
使用的数据:transcript_count_matrix.csv
# 加载必要的包
library(factoextra)
library(ggplot2)
# 读取数据
diff <- read.csv("C:/Users/27928/Desktop/香榧根腐病转录组/6、主成分分析/count数据/cx_ye_transcript_count_matrix.csv", header = TRUE, row.names = 1, sep = ",")
# 转置数据
diff_trans <- as.data.frame(t(diff))
# 删除常数列和全为零的列
diff_trans_filtered <- diff_trans[, apply(diff_trans, 2, function(x) !all(x == x[1]) && !all(x == 0))]
# 进行PCA
pca_result <- prcomp(diff_trans_filtered, scale = TRUE)
pca_scores <- as.data.frame(pca_result$x)
pca_scores$Sample <- rownames(pca_scores)
# 读取组信息并合并
group <- read.csv("C:/Users/27928/Desktop/香榧根腐病转录组/6、主成分分析/count数据/group.csv", header = TRUE, sep = ",")
colnames(group) <- c("Group", "Sample")
pca_scores <- merge(pca_scores, group, by = "Sample")
# 获取主成分的贡献率
summary_pca <- summary(pca_result)
pc1_var <- round(summary_pca$importance[2, 1] * 100, 2) # PC1的贡献率
pc2_var <- round(summary_pca$importance[2, 2] * 100, 2) # PC2的贡献率
# 使用fviz_pca_ind绘制PCA图,并直接修改坐标轴标签
fviz_pca_ind(
pca_result,
col.ind = pca_scores$Group,
addEllipses = TRUE,
legend.title = "Groups",
ellipse.type = "confidence",
ellipse.level = 0.9,
palette = c("#CC3333", "#339999"), #颜色设置
xlab = paste("PC1 (", pc1_var, "%)", sep = ""), #横坐标设置
ylab = paste("PC2 (", pc2_var, "%)", sep = "") #纵坐标设置
) +
theme(panel.border = element_rect(fill = NA, color = "black", linewidth = 1, linetype = "solid"))
2、相关性分析
使用的数据:gene_fpkm_matrix.csv
# 加载所需的R包
install.packages("pheatmap")
library(pheatmap) # 用于绘制热图
library(RColorBrewer)# 颜色包
#读取数据
data <- read.csv("C:/Users/27928/Desktop/香榧根腐病转录组/7、WGCNA/转录组数据gene.fpkm/c_gen_gene_fpkm_matrix.csv", header = TRUE, row.names = 1,sep = ",")
#从文件读取CSV数据,"文件名.csv":header = TRUE:表示CSV文件的第一行是列名,row.names = 1:第一列为行名。
#计算相关性矩阵
cor_matrix <- cor(data, method = "pearson") #指定了使用皮尔逊相关系数作为计算方法,可进行更换。
# 生成热图
pheatmap(cor_matrix,color = colorRampPalette(brewer.pal(9, "Greens"))(100), # 热图的颜色,可调整为Blues、Reds、YlOrRd等
clustering_distance_rows = "euclidean", # 聚类距离(行)指定用于行的聚类距离度量方法,这里使用欧几里得距离。
clustering_distance_cols = "euclidean", # 聚类距离(列)
clustering_method = "complete",# 聚类方法
display_numbers = FALSE) # 是否显示相关系数值
3、韦恩图分析
参考:https://mp.weixin.qq.com/s/xGJr826D1OAYcZd2zgATLw?from=kdocs_link
#加载必要的包
library(GOplot)
library(VennDiagram)
#表中第一列为差异基因的名称,第二列中1代表上调,-1代表下调
group1 <- read.delim("C:/Users/27928/Desktop/香榧根腐病转录组/9、韦恩图/C_GEN_CK_FO.csv", sep = ",")
group2 <- read.delim("C:/Users/27928/Desktop/香榧根腐病转录组/9、韦恩图/C_JING_CK_FO.csv", sep = ",")
group3 <- read.delim("C:/Users/27928/Desktop/香榧根腐病转录组/9、韦恩图/C_YE_CK_FO.csv", sep = ",")
venn <- GOVenn(group1, group2, group3, #3组基因列表
label = c('Root', 'Stem', 'Leaf'), #3组名称
circle.col = c('#074B90', '#93DCFC', '#3FD2C7'), #3组圈图颜色
lfc.col = c('#CED1F0', '#C0DEE2', '#E5C8BC'), #上调基因、下调基因和趋势相反基因的颜色
plot = FALSE) #plot=FALSE时,出图的同时输出交集统计信息