By Liu Qirui, 20 March, 2026
Forums

FST的概念

群体间遗传分化指数(FST ):是种群分化和遗传距离的一种衡量方法,分化指数越大,差异越大。FST 居于0~1之间,越接近1表示两个群体之间分化程度越大,受选择程度越高,反之亦然。

FST = (HT-HS)/ HT
HS : 亚群体中的平均杂合度
HT : 复合群体中的平均杂合度

FST 值的取值范围是[0,1],最大值为1表明两个群体完全分化,最小值为0表明群体间无分化。
在实际的研究中FST 值为 0-0.05 时说明群体间遗传分化很小,可以不做考虑;
为0.05-0.15时,表明群体间存在中等程度的遗传分化;
为0.15-0.25时群体间存在较大的遗传分化;
为0.25以上的时候群体间就存在很大的遗传分化了。

FST 计算

FST 的计算使用了vcftools工具,这里以杜鹃花的VCF数据(必须打包为.gz格式)为例,设置窗口大小50k,步长10k(具体参数需要根据自己基因组的大小以及所需要进行的分析进行设置,如果是单个标记分析则不需要这两个参数)

#读取数据,三个物种两两vs分析
vcftools --gzvcf combine_all.pass.clear.recode.vcf.gz

vcftools --gzvcf combine_all.pass.clear.recode.vcf.gz --weir-fst-pop R.agastum.txt --weir-fst-pop R.delavayi.txt --fst-window-size 50000 --fst-window-step 10000 --out R.agastum_vs_R.delavayi_filtered

#Run Time = 726.00 seconds

vcftools --gzvcf combine_all.pass.clear.recode.vcf.gz --weir-fst-pop R.agastum.txt --weir-fst-pop R.irroratum.txt --fst-window-size 50000 --fst-window-step 10000 --out R.agastum_vs_R.irroratum_filtered

#Run Time = 723.00 seconds

vcftools --gzvcf combine_all.pass.clear.recode.vcf.gz --weir-fst-pop R.irroratum.txt --weir-fst-pop R.delavayi.txt --fst-window-size 50000 --fst-window-step 10000 --out R.irroratum_vs_R.delavayi_filtered

#Run Time = 692.00 seconds

FST结果可视化

需要在R中读取三个.fst文件然后进行处理,结果通常可以绘制三个图:染色体曼哈顿图,用于看每个窗口/标记的具体分化情况,是最核心的结果;汇总的总FST箱形图,用来直观观察全部的FST值分布情况;分染色体的小提琴图,用于精确查看每个染色体的FST分化具体情况。

library(ggplot2)
library(dplyr)
library(tidyr)
library(ggdist)
library(ggrain)

#读取数据

target_chrs <- paste0("chr", sprintf("%02d", 1:13))
fst_MYLZ <- read.table("R.irroratum_vs_R.delavayi_filtered.windowed.weir.fst", header = T) %>%
    filter(CHROM %in% target_chrs) %>%
    mutate(WEIGHTED_FST = ifelse(WEIGHTED_FST < 0, 0, WEIGHTED_FST),
           Comparison = "R.irroratum vs R.delavayi")
fst_LZH <- read.table("R.agastum_vs_R.irroratum_filtered.windowed.weir.fst", header = T) %>%
    filter(CHROM %in% target_chrs) %>%
    mutate(WEIGHTED_FST = ifelse(WEIGHTED_FST < 0, 0, WEIGHTED_FST),
           Comparison = "R.agastum vs R.irroratum")
fst_MYH <- read.table("R.agastum_vs_R.delavayi_filtered.windowed.weir.fst", header = T) %>%
    filter(CHROM %in% target_chrs) %>%
    mutate(WEIGHTED_FST = ifelse(WEIGHTED_FST < 0, 0, WEIGHTED_FST),
           Comparison = "R.agastum vs R.delavayi")

#数据处理和基础参数准备(设置颜色)

all_data <- rbind(fst_MYH, fst_LZH, fst_MYLZ)
data_cum <- all_data %>%
  group_by(CHROM) %>%
  summarise(chr_len = max(BIN_START)) %>%
  mutate(tot = cumsum(as.numeric(chr_len)) - chr_len) %>%
  select(-chr_len) %>%
  left_join(all_data, ., by = c("CHROM" = "CHROM")) %>%
  arrange(CHROM, BIN_START) %>%
  mutate(BPcum = BIN_START + tot)
axisdf <- data_cum %>%
  group_by(CHROM) %>%
  summarize(center = (max(BPcum) + min(BPcum)) / 2)
mycol1 <- c(
  "#1F78B4", "#33A02C", "#E31A1C", "#FF7F00", "#6A3D9A",
  "#A6CEE3", "#B2DF8A", "#FB9A99", "#FDBF6F", "#CAB2D6",
  "#FFFF99", "#B15928", "#8DD3C7"                      
)

#全部窗口的曼哈顿图结果

p <- ggplot(data_cum, aes(x = BPcum, y = WEIGHTED_FST)) +
        geom_point(aes(color = CHROM), alpha = 0.6, size = 0.5) +
        scale_color_manual(values = mycol1) +
        scale_x_continuous(label = axisdf$CHROM, breaks = axisdf$center) +
        facet_wrap(~Comparison, ncol = 1) +
        theme_bw() +
        theme(
          panel.grid.major.x = element_blank(),
          panel.grid.minor.x = element_blank(),
          axis.text = element_text(size = 16),
          axis.title = element_text(size = 18),
          legend.title = element_text(size = 18),
          legend.text = element_text(size = 16),
          plot.title = element_text(hjust = 0, size = 24, face = "bold"),
          legend.position = "none",
          strip.text = element_text(size = 16, face = "bold")
          )+
        labs(title = "a.", x = "Chromosome", y = expression(italic(F)[ST]))

#汇总FST值的箱形图

long_fst <- combined_fst %>%
  pivot_longer(cols = starts_with("Fst"), names_to = "Comparison", values_to = "Fst_Value")
box <- ggplot(long_fst, aes(x = Comparison, y = Fst_Value, fill = Comparison)) +
  geom_boxplot(outlier.shape = NA) +
  coord_cartesian(ylim = c(0, 1.0)) +
  theme_bw() +
  labs(title = "b.", y = "Fst Value") +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_text(size = 16),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(size = 18),
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 16),
    plot.title = element_text(hjust = 0, size = 24, face = "bold"),
    legend.position = "none"
  )

#分染色体的FST值小提琴图

data_long <- bind_rows(
  format_data(fst_MYLZ, "R.irroratum vs R.delavayi"),
  format_data(fst_LZH, "R.agastum vs R.irroratum"),
  format_data(fst_MYH, "R.agastum vs R.delavayi")
)
chrom <- ggplot(data_long, aes(x = CHROM, y = WEIGHTED_FST, fill = Comparison)) +
  geom_violin(trim = TRUE, alpha = 0.5,) +
  geom_boxplot(width = 0.1, color = "black", outlier.shape = NA, alpha = 0.25) +
  facet_wrap(~Comparison, ncol = 1) +
  scale_y_continuous(limits = c(0, 1.0), breaks = seq(0, 1, 0.2)) +
  scale_fill_manual(values = c("R.irroratum vs R.delavayi" = "#1F78B4",
   "R.agastum vs R.irroratum" = "#33A02C"
   "R.agastum vs R.delavayi" = "#E31A1C")) +
  labs(
    title = "c.",
    x = "Chromosome",
    y = expression(italic(F)[ST])
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_text(size = 16),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(size = 18),
    legend.title = element_text(size = 18),
    legend.text = element_text(size = 16),
    plot.title = element_text(hjust = 0, size = 24, face = "bold"),
    legend.position = "none",
    strip.text = element_text(size = 16, face = "bold")
  )

以上结果根据自己的需要进行拼接即可