vcftools --gzvcf combine_all.pass.clear.recode.vcf.gz --weir-fst-pop R.agastum.txt --weir-fst-pop R.delavayi.txt --fst-window-size 50000 --fst-window-step 10000 --out R.agastum_vs_R.delavayi_filtered
#Run Time = 726.00 seconds
vcftools --gzvcf combine_all.pass.clear.recode.vcf.gz --weir-fst-pop R.agastum.txt --weir-fst-pop R.irroratum.txt --fst-window-size 50000 --fst-window-step 10000 --out R.agastum_vs_R.irroratum_filtered
#Run Time = 723.00 seconds
vcftools --gzvcf combine_all.pass.clear.recode.vcf.gz --weir-fst-pop R.irroratum.txt --weir-fst-pop R.delavayi.txt --fst-window-size 50000 --fst-window-step 10000 --out R.irroratum_vs_R.delavayi_filtered
#Run Time = 692.00 seconds
FST结果可视化
需要在R中读取三个.fst文件然后进行处理,结果通常可以绘制三个图:染色体曼哈顿图,用于看每个窗口/标记的具体分化情况,是最核心的结果;汇总的总FST箱形图,用来直观观察全部的FST值分布情况;分染色体的小提琴图,用于精确查看每个染色体的FST分化具体情况。
library(ggplot2)
library(dplyr)
library(tidyr)
library(ggdist)
library(ggrain)
#读取数据
target_chrs <- paste0("chr", sprintf("%02d", 1:13))
fst_MYLZ <- read.table("R.irroratum_vs_R.delavayi_filtered.windowed.weir.fst", header = T) %>%
filter(CHROM %in% target_chrs) %>%
mutate(WEIGHTED_FST = ifelse(WEIGHTED_FST < 0, 0, WEIGHTED_FST),
Comparison = "R.irroratum vs R.delavayi")
fst_LZH <- read.table("R.agastum_vs_R.irroratum_filtered.windowed.weir.fst", header = T) %>%
filter(CHROM %in% target_chrs) %>%
mutate(WEIGHTED_FST = ifelse(WEIGHTED_FST < 0, 0, WEIGHTED_FST),
Comparison = "R.agastum vs R.irroratum")
fst_MYH <- read.table("R.agastum_vs_R.delavayi_filtered.windowed.weir.fst", header = T) %>%
filter(CHROM %in% target_chrs) %>%
mutate(WEIGHTED_FST = ifelse(WEIGHTED_FST < 0, 0, WEIGHTED_FST),
Comparison = "R.agastum vs R.delavayi")
#数据处理和基础参数准备(设置颜色)
all_data <- rbind(fst_MYH, fst_LZH, fst_MYLZ)
data_cum <- all_data %>%
group_by(CHROM) %>%
summarise(chr_len = max(BIN_START)) %>%
mutate(tot = cumsum(as.numeric(chr_len)) - chr_len) %>%
select(-chr_len) %>%
left_join(all_data, ., by = c("CHROM" = "CHROM")) %>%
arrange(CHROM, BIN_START) %>%
mutate(BPcum = BIN_START + tot)
axisdf <- data_cum %>%
group_by(CHROM) %>%
summarize(center = (max(BPcum) + min(BPcum)) / 2)
mycol1 <- c(
"#1F78B4", "#33A02C", "#E31A1C", "#FF7F00", "#6A3D9A",
"#A6CEE3", "#B2DF8A", "#FB9A99", "#FDBF6F", "#CAB2D6",
"#FFFF99", "#B15928", "#8DD3C7"
)
#全部窗口的曼哈顿图结果
p <- ggplot(data_cum, aes(x = BPcum, y = WEIGHTED_FST)) +
geom_point(aes(color = CHROM), alpha = 0.6, size = 0.5) +
scale_color_manual(values = mycol1) +
scale_x_continuous(label = axisdf$CHROM, breaks = axisdf$center) +
facet_wrap(~Comparison, ncol = 1) +
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text = element_text(size = 16),
axis.title = element_text(size = 18),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16),
plot.title = element_text(hjust = 0, size = 24, face = "bold"),
legend.position = "none",
strip.text = element_text(size = 16, face = "bold")
)+
labs(title = "a.", x = "Chromosome", y = expression(italic(F)[ST]))
#汇总FST值的箱形图
long_fst <- combined_fst %>%
pivot_longer(cols = starts_with("Fst"), names_to = "Comparison", values_to = "Fst_Value")
box <- ggplot(long_fst, aes(x = Comparison, y = Fst_Value, fill = Comparison)) +
geom_boxplot(outlier.shape = NA) +
coord_cartesian(ylim = c(0, 1.0)) +
theme_bw() +
labs(title = "b.", y = "Fst Value") +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size = 16),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(size = 18),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16),
plot.title = element_text(hjust = 0, size = 24, face = "bold"),
legend.position = "none"
)
#分染色体的FST值小提琴图
data_long <- bind_rows(
format_data(fst_MYLZ, "R.irroratum vs R.delavayi"),
format_data(fst_LZH, "R.agastum vs R.irroratum"),
format_data(fst_MYH, "R.agastum vs R.delavayi")
)
chrom <- ggplot(data_long, aes(x = CHROM, y = WEIGHTED_FST, fill = Comparison)) +
geom_violin(trim = TRUE, alpha = 0.5,) +
geom_boxplot(width = 0.1, color = "black", outlier.shape = NA, alpha = 0.25) +
facet_wrap(~Comparison, ncol = 1) +
scale_y_continuous(limits = c(0, 1.0), breaks = seq(0, 1, 0.2)) +
scale_fill_manual(values = c("R.irroratum vs R.delavayi" = "#1F78B4",
"R.agastum vs R.irroratum" = "#33A02C",
"R.agastum vs R.delavayi" = "#E31A1C")) +
labs(
title = "c.",
x = "Chromosome",
y = expression(italic(F)[ST])
) +
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size = 16),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(size = 18),
legend.title = element_text(size = 18),
legend.text = element_text(size = 16),
plot.title = element_text(hjust = 0, size = 24, face = "bold"),
legend.position = "none",
strip.text = element_text(size = 16, face = "bold")
)
以上结果根据自己的需要进行拼接即可