超几何分布检验 | Zhang Lab Forum

By Gengxin, 30 May, 2026

Forums

超几何分布检验用于检验你感兴趣的基因集合中，落在某条染色体上的基因数量，是否显著多于随机水平。

# 输入你的数字
N <- 28000    # 全基因组总基因数（香榧大约这个量级）
K <- 1800     # 某条染色体总基因数
n <- 120      # 你的目标基因集大小
k <- 25       # 你的基因中落在这条染色体的数量

# 超几何检验
p_value <- phyper(k - 1, K, N - K, n, lower.tail = FALSE)
p_value

进阶版

library(ggplot2)
library(dplyr)

# ===================== 数据 =====================
chr_data <- data.frame(
  Chr = paste0("Chr", 1:11),
  Observed = c(257,258,236,253,268,421,257,305,277,333,274),
# Observed = c(,,,,,,,,,,),
  Total_genes = c(3873,3354,3509,3692,4313,6526,3988,4566,3872,4717,3893)
)

N_total_genome <- 46303
n_query <- 3139

# ===================== 计算 =====================
chr_data <- chr_data %>%
  mutate(
    Expected = n_query * Total_genes / N_total_genome,
    Fold = Observed / Expected,
    P_value = phyper(Observed - 1, Total_genes, N_total_genome - Total_genes, n_query, lower.tail = FALSE),
    sig = case_when(
      P_value < 0.001 ~ "***",
      P_value < 0.01  ~ "**",
      P_value < 0.05  ~ "*",
      TRUE ~ ""
    )
  )

# 染色体按 1-11 顺序
chr_data$Chr <- factor(chr_data$Chr, levels = paste0("Chr", 1:11))

# ===================== ✅ 导出结果表格（Excel可打开）=====================
write.csv(
  chr_data,
  file = "E:/博士课题/性别偏向基因的鉴定/Mfuzz/染色体富集结果/cluster12_染色体富集结果.csv",
  row.names = FALSE
)

# ===================== 绘图 =====================
ggplot(chr_data, aes(x = Chr, y = Fold)) +
  geom_col(aes(fill = -log10(P_value)), width = 0.7) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "gray30") +
  geom_text(aes(label = sig), size = 6, fontface = "bold", vjust = -0.8) +
  
  scale_fill_gradient(low = "#D5D5D5", high = "#F3C8B7") +
  
  # 顶部自动留空，保证星号显示
  scale_y_continuous(breaks = seq(0, 2.0, 0.1), expand = expansion(mult = c(0, 0.15))) +
  
  labs(x = "Chromosome", y = "Enrichment fold", fill = "-log10(P)") +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# 导出高清图片
ggsave("E:/博士课题/性别偏向基因的鉴定/Mfuzz/染色体富集结果/cluster12_chromosome_enrichment.pdf", width = 10, height = 6, dpi = 600)