主要有三个数据
1.计算Average of gene length (bp)的shell脚本
#!/bin/bash
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <GFF3_file>"
exit 1
fi
GFF3_FILE="$1"
AVG_GENE_LENGTH=$(awk '
BEGIN {FS="\t"}
$3=="gene" {
gene_length += $5 - $4
gene_count++
}
END {
if (gene_count > 0) {
print gene_length / gene_count
} else {
print "No genes found in the GFF3 file."
}
}' "$GFF3_FILE")
echo "Average of gene length (bp): $AVG_GENE_LENGTH"
# 运行结果(脚本在92的容器RNA_snakemake里)
root@bf19e2a71b7c:/home/reference# ./avg_gene_length.sh ./Cinnamomum_japonicum.gff3
Average of gene length (bp): 7099.38
2.计算Average length per exon (bp)的shell脚本
#!/bin/bash
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <GFF3_file>"
exit 1
fi
GFF3_FILE="$1"
AVG_GENE_LENGTH=$(awk '
BEGIN {FS="\t"}
$3=="gene" {
gene_length += $5 - $4
gene_count++
}
END {
if (gene_count > 0) {
print gene_length / gene_count
} else {
print "No genes found in the GFF3 file."
}
}' "$GFF3_FILE")
echo "Average of gene length (bp): $AVG_GENE_LENGTH"
root@bf19e2a71b7c:/home/reference# cat avg_exon_length.sh
#!/bin/bash
if [ "$#" -ne 1 ]; then
echo "Usage: $0 <GFF3_file>"
exit 1
fi
GFF3_FILE="$1"
AVG_EXON_LENGTH=$(awk '
BEGIN {FS="\t"}
$3=="exon" {
exon_length += $5 - $4
exon_count++
}
END {
if (exon_count > 0) {
print exon_length / exon_count
} else {
print "No exons found in the GFF3 file."
}
}' "$GFF3_FILE")
echo "Average length per exon (bp): $AVG_EXON_LENGTH"
# 运行结果(容器同上)
root@bf19e2a71b7c:/home/reference# ./avg_exon_length.sh ./Cinnamomum_japonicum.gff3
Average length per exon (bp): 300.659
3.计算Average exons per gene的python脚本
#!/usr/bin/env python3
# 初始化变量
total_genes = 0
total_exons = 0
# 打开GFF3文件
with open('./Cinnamomum_japonicum.gff3', 'r') as gff3_file:
for line in gff3_file:
if line.startswith('#'):
continue # 跳过注释行
fields = line.strip().split('\t')
feature_type = fields[2]
# 检查是否是基因或外显子
if feature_type == 'gene':
total_genes += 1
elif feature_type == 'exon':
total_exons += 1
# 计算平均外显子数(外显子总数除以基因总数)
if total_genes > 0:
avg_exons_per_gene = total_exons / total_genes
else:
avg_exons_per_gene = 0
# 输出结果
print(f"Total number of genes: {total_genes}")
print(f"Total number of exons: {total_exons}")
print(f"Average number of exons per gene: {avg_exons_per_gene}")
# 运行结果(以天竺桂为例,脚本位置在91的msy_python容器中)
root@dc2147a07b60:/home/masiyi# python avg_exon_per_gene.py
Total number of genes: 79137
Total number of exons: 456308
Average number of exons per gene: 5.766051278163185