判断正选择基因染色体是否分布显著

By ruiyuan Li, 31 October, 2025

Forums

用fisher检验，代码如下：

from scipy.stats import fisher_exact

#原始数据

data = {

'chr01': (133, 2561),

'chr02': (148, 2365),

'chr03': (130, 2389),

'chr04': (135, 2468),

'chr05': (182, 2863),

'chr06': (250, 4500),

'chr07': (162, 2692),

'chr08': (177, 3139),

'chr09': (161, 2633),

'chr10': (177, 3195),

'chr11': (142, 3631),

'ptg': (40, 620)

}

total_selected = 1873

total_genes = 33056

#结果统计

results = []

for chr_name, (selected, total) in data.items():

non_selected = total - selected

other_selected = total_selected - selected

other_non_selected = (total_genes - total) - other_selected

# 构造列联表

table = [[selected, non_selected],

[other_selected, other_non_selected]]

# Fisher精确检验（单尾，看是否富集）

oddsratio, p_value = fisher_exact(table, alternative='greater')

results.append((chr_name, selected, total, selected / total, p_value))

#按p值排序

results.sort(key=lambda x: x[-1])

#打印结果

for r in results:

print(f"{r[0]}: 正选择基因 {r[1]}/{r[2]} ({r[3]:.2%}), p值={r[4]:.4g}")