参考教程:https://blog.csdn.net/hgz2020/article/details/145706720
1 数据准备
- 经过LD过滤后的VCF文件
2 利用plink运行程序
# 运行命令
/home/software/plink --vcf 12genome-2.snpcluster.gap5.LD.vcf.gz --const-fid --allow-extra-chr --genome --out genome_IBD_result --threads 30
# 结果文件:
genome_IBD_result.genome genome_IBD_result.log genome_IBD_result.nosex3 画图
#!/usr/bin/env Rscript
library(dplyr)
library(ggplot2)
setwd("E:/文件/2博士期间文件/博士课题/羊踯躅/03 群体分析-v1/06IBD")
# --- 1. 数据读取与预处理 ---
IBD <- read.table("genome_IBD_result.genome", header=TRUE, sep="")
group_info <- read.table("sample_list.txt", header=TRUE, sep="")
colnames(group_info) <- c("IID1", "group")
# 设定因子顺序
group_order <- unique(group_info$group)
group_info$group <- factor(group_info$group, levels = group_order)
# --- 2. 匹配群体信息 ---
combine1 <- inner_join(IBD, group_info, by="IID1")
group_info2 <- group_info
colnames(group_info2) <- c("IID2", "group2")
combine2 <- inner_join(combine1, group_info2, by="IID2")
# --- 3. 强制统一配对顺序并计算均值 ---
# 无论原始顺序如何,统一改为从小到大的配对
combine2_fixed <- combine2 %>%
mutate(
g1 = pmin(as.character(group), as.character(group2)),
g2 = pmax(as.character(group), as.character(group2))
)
result <- aggregate(x = combine2_fixed$PI_HAT,
by = list(combine2_fixed$g1, combine2_fixed$g2),
FUN = mean)
colnames(result) <- c("group1", "group2", "mean_PI_HAT")
# --- 4. 绘图前的过滤(关键:保留左上角) ---
# 将 group1 和 group2 转回有序因子
result$group1 <- factor(result$group1, levels = group_order)
result$group2 <- factor(result$group2, levels = group_order)
# 过滤逻辑:横轴序号 (as.numeric) 必须小于或等于 纵轴序号
# 这样数据点就会堆积在左上部
res_plot <- result[as.numeric(result$group1) <= as.numeric(result$group2), ]
# 5.1 绘制群体间热图
ggplot(res_plot, aes(x = group1, y = group2, fill = mean_PI_HAT)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "white", high = "red") +
theme_minimal() +
labs(title = "Upper-Left Triangle IBD Heatmap",
x = "Population 1",
y = "Population 2",
fill = "Mean PI_HAT") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# 保存为 PDF
ggsave("IBD_Heatmap_UpperLeft.pdf", width = 8, height = 7)
# 或者保存为高分辨率的 PNG
ggsave("IBD_Heatmap_UpperLeft.png", width = 8, height = 7, dpi = 300)
##### 5.2 绘制群体关系网络图(修改颜色)
library(igraph)
# --- 1. 准备颜色映射表 ---
# 提取所有唯一的群体名称
all_groups <- unique(group_info$group)
# 使用常用的 Set1 配色方案(RColorBrewer)
node_palette <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF")
# 如果群体数超过 8 个,可以重复颜色或换用更大的色板
group_colors <- setNames(rep(node_palette, length.out = length(all_groups)), all_groups)
# --- 2. 处理绘图数据 ---
# 过滤强亲缘关系连接
edges <- res[res$mean_PI_HAT > 0 & res$group1 != res$group2, ]
g <- graph_from_data_frame(d = edges, directed = FALSE)
# --- 3. 自动匹配节点属性 ---
# 根据节点名称(群体名)从映射表中取色
V(g)$color <- group_colors[V(g)$name]
V(g)$frame.color <- "white" # 节点边框颜色
V(g)$size <- 25 # 节点大小
# --- 4. 动态设置边属性 ---
# 粗细:基于 PI_HAT 强度,设置一个最小宽度以防线太细
E(g)$width <- pmax(E(g)$mean_PI_HAT * 350, 0.8)
# 颜色:深灰色半透明,让强连接更显著
E(g)$color <- adjustcolor("gray40", alpha.f = 0.4)
# --- 5. 保存 PNG 图片 ---
png("IBD_Network_GroupColored.png", width = 2800, height = 2800, res = 300, type = "cairo")
# 增加页边距,防止标签超出边界
par(mar = c(2, 2, 2, 2))
# 绘图
plot(g,
layout = layout_with_fr(g), # 力导向布局:关系近的群体会自动靠拢
vertex.label.cex = 0.8, # 标签字体大小
vertex.label.color = "black", # 标签颜色
vertex.label.font = 2, # 加粗
edge.curved = 0.15, # 稍微带点弧度
main = "Population Interaction Network (IBD)")
# 添加图例:方便分辨不同颜色代表的群体
legend("topleft", legend = names(group_colors), fill = group_colors,
bty = "n", cex = 0.7, title = "Populations")
dev.off()