from scipy.stats import fisher_exact
#原始数据
data = {
'chr01': (133, 2561),
'chr02': (148, 2365),
'chr03': (130, 2389),
'chr04': (135, 2468),
'chr05': (182, 2863),
'chr06': (250, 4500),
'chr07': (162, 2692),
'chr08': (177, 3139),
'chr09': (161, 2633),
'chr10': (177, 3195),
'chr11': (142, 3631),
'ptg': (40, 620)
}
total_selected = 1873
total_genes = 33056
#结果统计
results = []
for chr_name, (selected, total) in data.items():
non_selected = total - selected
other_selected = total_selected - selected
other_non_selected = (total_genes - total) - other_selected
# 构造列联表
table = [[selected, non_selected],
[other_selected, other_non_selected]]
# Fisher精确检验(单尾,看是否富集)
oddsratio, p_value = fisher_exact(table, alternative='greater')
results.append((chr_name, selected, total, selected / total, p_value))
#按p值排序
results.sort(key=lambda x: x[-1])
#打印结果
for r in results:
print(f"{r[0]}: 正选择基因 {r[1]}/{r[2]} ({r[3]:.2%}), p值={r[4]:.4g}")