利用genehapR包对GWAS候选基因的单倍型分析

By Tingting, 23 May, 2026

Forums

首先，这个R包使用起来比较方便，但是安装较难。

一、软件安装

#conda下载4.4.3的R
#安装步骤
install.packages("BiocManager")
BiocManager::install(c("Biostrings", "GenomicRanges", "muscle", "IRanges", "rtracklayer", "trackViewer"))
install.packages("geneHapR")
#如果出现报错，则用conda安装依赖包，安装完依赖包，使用以下命令进行安装
#安装genehapR
devtools::install_git("https://gitee.com/zhangrenl/genehapr")

PS: genehapR也有镜像，但是下载不了

二、分析基因的单倍型

## 导入基因型vcf数据，导入候选基因的vcf文件即可
library(geneHapR)
vcf = import_vcf("phenotype17_chr1_176693484_176784176.vcf")
# 单倍型分型
hapResult <- vcf2hap(vcf)
write.csv(hapResult,"p17_chr1_176693484_176784176-hapresult.csv")
#批量分析的脚本
#!/usr/bin/env Rscript

# 批量单倍型分析脚本
# 用法：在包含 phenotype*.vcf 文件的目录下运行 Rscript batch_haplotype.R

library(geneHapR)

# 获取所有符合模式的VCF文件
vcf_files <- list.files(pattern = "^phenotype.*\\.vcf$")
# 循环处理每个文件
for (vcf_file in vcf_files) {
    cat("正在处理:", vcf_file, "\n")
    # 构造输出文件名：将 "phenotype" 替换为 "p"，并将 ".vcf" 替换为 "-hapresult.csv"
    out_file <- gsub("^phenotype", "p", vcf_file)
    out_file <- gsub("\\.vcf$", "-hapresult.csv", out_file)
    # 如果结果文件已存在，可选择跳过（避免重复运行覆盖）
    if (file.exists(out_file)) {
        cat("  结果文件已存在，跳过:", out_file, "\n\n")
        next
    }
    
    # 尝试导入VCF并分型
    result <- tryCatch({
        vcf <- import_vcf(vcf_file)
        hapResult <- vcf2hap(vcf)
        hapResult
    }, error = function(e) {
        cat("  处理失败:", conditionMessage(e), "\n")
        return(NULL)
    })
    
    if (!is.null(result)) {
        write.csv(result, out_file, row.names = FALSE)
        cat("  成功保存:", out_file, "\n\n")
    } else {
        cat("  跳过该文件。\n\n")
    }
}
cat("批量处理完成。\n")
#重测序文件路径：/home/147_Tg_reseq/142_ped_final_gwas_result/142reseq_LDblock_vcf_result