By masiyi, 9 June, 2024
Forums

主要有三个数据

1.计算Average of gene length (bp)的shell脚本

#!/bin/bash  
  
if [ "$#" -ne 1 ]; then  
    echo "Usage: $0 <GFF3_file>"  
    exit 1  
fi  
  
GFF3_FILE="$1"  
  
AVG_GENE_LENGTH=$(awk '  
BEGIN {FS="\t"}  
$3=="gene" {  
    gene_length += $5 - $4  
    gene_count++  
}  
END {  
    if (gene_count > 0) {  
        print gene_length / gene_count  
    } else {  
        print "No genes found in the GFF3 file."  
    }  
}' "$GFF3_FILE")  

echo "Average of gene length (bp): $AVG_GENE_LENGTH"

# 运行结果(脚本在92的容器RNA_snakemake里)
root@bf19e2a71b7c:/home/reference# ./avg_gene_length.sh ./Cinnamomum_japonicum.gff3 
Average of gene length (bp): 7099.38

2.计算Average length per exon (bp)的shell脚本

#!/bin/bash  
  
if [ "$#" -ne 1 ]; then  
    echo "Usage: $0 <GFF3_file>"  
    exit 1  
fi  
  
GFF3_FILE="$1"  
  
AVG_GENE_LENGTH=$(awk '  
BEGIN {FS="\t"}  
$3=="gene" {  
    gene_length += $5 - $4  
    gene_count++  
}  
END {  
    if (gene_count > 0) {  
        print gene_length / gene_count  
    } else {  
        print "No genes found in the GFF3 file."  
    }  
}' "$GFF3_FILE")  

echo "Average of gene length (bp): $AVG_GENE_LENGTH"
root@bf19e2a71b7c:/home/reference# cat avg_exon_length.sh 
#!/bin/bash  
  
if [ "$#" -ne 1 ]; then  
    echo "Usage: $0 <GFF3_file>"  
    exit 1  
fi  
  
GFF3_FILE="$1"  
  
AVG_EXON_LENGTH=$(awk '  
BEGIN {FS="\t"}  
$3=="exon" {  
    exon_length += $5 - $4  
    exon_count++  
}  
END {  
    if (exon_count > 0) {  
        print exon_length / exon_count  
    } else {  
        print "No exons found in the GFF3 file."  
    }  
}' "$GFF3_FILE")  

echo "Average length per exon (bp): $AVG_EXON_LENGTH"

# 运行结果(容器同上)
root@bf19e2a71b7c:/home/reference# ./avg_exon_length.sh ./Cinnamomum_japonicum.gff3 
Average length per exon (bp): 300.659

3.计算Average exons per gene的python脚本

#!/usr/bin/env python3  
  
# 初始化变量  
total_genes = 0  
total_exons = 0  
  
# 打开GFF3文件  
with open('./Cinnamomum_japonicum.gff3', 'r') as gff3_file:  
    for line in gff3_file:  
        if line.startswith('#'):  
            continue  # 跳过注释行  
        fields = line.strip().split('\t')  
        feature_type = fields[2]  
          
        # 检查是否是基因或外显子  
        if feature_type == 'gene':  
            total_genes += 1  
        elif feature_type == 'exon':  
            total_exons += 1  
  
# 计算平均外显子数(外显子总数除以基因总数)  
if total_genes > 0:  
    avg_exons_per_gene = total_exons / total_genes  
else:  
    avg_exons_per_gene = 0  
  
# 输出结果  
print(f"Total number of genes: {total_genes}")  
print(f"Total number of exons: {total_exons}")  
print(f"Average number of exons per gene: {avg_exons_per_gene}")


# 运行结果(以天竺桂为例,脚本位置在91的msy_python容器中)
root@dc2147a07b60:/home/masiyi# python avg_exon_per_gene.py 
Total number of genes: 79137
Total number of exons: 456308
Average number of exons per gene: 5.766051278163185