01 使用CombineGVCFs合并所有样本的gVCF文件
分染色体合并:
# 1.查看羊踯躅中的染色体名称,然后单独对每天染色体进行操作:
grep "^>" Rmolle_genomic_GCA_025413875.1.fasta | sed 's/^>//' | awk '{print $1}'
# 2.提取gvcf文件的名称及绝对路径:
## 获取gvcf_list.txt文件
find . -type f -name "*.g.vcf.gz" -exec realpath {} \; > gvcf_list.txt
# 3.按照单条染色体合并gVCF文件(以chr12为例)
nohup ./run_6_gvcf_combinded-chr12.sh > log_6_chr12.log 2>&1 &
## 脚本内容:
INPUT_DIR="/home/Raw_data_20250821/Rmolle_callsnp_work/Rmolle_all_results/4gvcf_file_bygatk"
REF="/home/Raw_data_20250821/Rmolle_callsnp_work/Rmolle_genome_GCA025413875/Rmolle_genomic_GCA_025413875.1.fasta"
OUTPUT_DIR="/home/Raw_data_20250821/Rmolle_callsnp_work/Rmolle_all_results/5gVCF_combinded"
GATK="/home/software/gatk/gatk-4.1.8.1/gatk"
"${GATK}" --java-options "-Xmx60g" CombineGVCFs \
-R "${REF}" \
$(for i in `tail -n +1 ${INPUT_DIR}/gvcf_list.txt| cut -f 1 `; do echo "--variant ${i} " ;done) \
-L CM046399.1\
-O "${OUTPUT_DIR}/Combined.12.399.g.vcf.gz"02 使用GenotypeGVCFs进行变异鉴定
gatk --java-options "-Xmx40g" GenotypeGVCFs \
-R genome.fasta \
-V Combined.1.g.vcf.gz \
-L 1 \#染色体名称
--output raw.chr1.vcf.gz \
--tmp-dir ./tmp03 将SNP和InDel变异分开
将SNP和InDel变异分开。后续分析主要使用SNP信息。
gatk --java-options "-Xmx10g" SelectVariants -R genome.fasta --outputSNP.raw.chr1.vcf.gz--variant raw.chr1.vcf.gz --select-type-to-include SNP
gatk --java-options "-Xmx10g" SelectVariants -R genome.fasta --output indel.raw.chr1.vcf.gz --variant raw.chr1.vcf.gz --select-type-to-include INDEL