【金山文档 | WPS云文档】 可视化绘图:不同divergence等级下与ΔR的动态变化箱线图绘制 https://www.kdocs.cn/l/ckVyRQ7VH2tX
1. 输入文件
merged.csv: 包含divergence与雄性表达解偶联(ΔR)数据。
2.核心代码
# CDS, DNA, Promoter 的颜色
palette = {
"CDS": "#eba198",
"DNA": "#8cc0cc",
"Promoter": "#9CC287"
}
# 映射脚本1中使用的列名到统一颜色
colors_div = {
"cds_div": palette["CDS"],
"dna_div": palette["DNA"],
"promoter_div": palette["Promoter"]
}
# 设置绘图风格
mpl.rcParams['pdf.fonttype'] = 42
plt.rcParams['font.family'] = 'Arial'
sns.set_style("ticks")
# 读取数据
df = pd.read_csv(INPUT_FILE)
# 数据转换长表化 (用于箱线图)
df_long = pd.melt(
df,
id_vars=['gene1', 'gene2', 'delta_raw'],
value_vars=['cds_div', 'dna_div', 'promoter_div'],
var_name='Divergence_Type',
value_name='Divergence_Value'
)
# 划分 Divergence 等级
bins = [0, 0.2, 0.4, 1.0]
q_labels = ['0 - 0.2', '0.2 - 0.4', '> 0.4']
df_long['group'] = pd.cut(df_long['Divergence_Value'], bins=bins, labels=q_labels, include_lowest=True)
# 名称映射
type_map = {"cds_div": "CDS", "dna_div": "DNA", "promoter_div": "Promoter"}
df_long['Divergence_Type'] = df_long['Divergence_Type'].map(type_map)
hue_order = ["CDS", "DNA", "Promoter"]
# 箱线抖动散点图
fig1, ax1 = plt.subplots(figsize=(12, 7))
plot_df = df_long.dropna().copy()
sns.stripplot(
data=plot_df,
x='group', y='delta_raw', hue='Divergence_Type', hue_order=hue_order,
ax=ax1, palette=palette, dodge=True, jitter=0.2, alpha=0.4, s=3, legend=False, zorder=1
)
sns.boxplot(
data=plot_df,
x='group', y='delta_raw', hue='Divergence_Type', hue_order=hue_order,
ax=ax1, notch=False, palette=palette, width=0.7, showfliers=False, linewidth=1.5, zorder=2
)
# 设置箱体透明度
for patch in ax1.patches:
r, g, b, a = patch.get_facecolor()
patch.set_facecolor((r, g, b, 0.5))
n_groups = len(q_labels)
n_hues = len(hue_order)
# 计算每个箱体的中心位置,用于放置数值标签和设置网格线位置
tick_locs = []
for i in range(n_groups):
for j in range(n_hues):
pos = i + (j - (n_hues - 1) / 2) * (0.7 / n_hues)
tick_locs.append(pos)
for i in range(n_groups):
for j in range(n_hues):
# 计算每个 hue 箱子的中心位置
pos = i + (j - (n_hues - 1) / 2) * (0.7 / n_hues)
# 获取该组对应的中值
val = plot_df[(plot_df['group'] == q_labels[i]) &
(plot_df['Divergence_Type'] == hue_order[j])]['delta_raw'].median()
if not np.isnan(val):
ax1.text(pos, val + 0.002, f'{val:.3f}', ha='center', va='bottom',
fontsize=9, fontweight='bold', color='black')
y_data_max = plot_df['delta_raw'].quantile(0.98)
y_limit = y_data_max * 1.15 # 在数据上限基础上增加 15% 的留白
ax1.set_ylim(-0.02, y_limit)
# 设置主轴的刻度位置为每个箱体的中心,以便网格线穿过箱体中间
ax1.set_xticks(tick_locs, minor=True)
# 开启网格线:axis='both'表示横纵都有,which='both'涵盖主次刻度,设定颜色、线型和透明度
ax1.grid(axis='both', which='both', color='lightgrey', linestyle='--', linewidth=0.5, alpha=0.7, zorder=0)
# 顶部样本量
ax_top = ax1.twiny()
ax_top.set_xlim(ax1.get_xlim())
tick_labels = []
for i in range(len(q_labels)):
for j in range(len(hue_order)):
pos = i + (j - (len(hue_order) - 1) / 2) * (0.7 / len(hue_order))
count = len(plot_df[(plot_df['group'] == q_labels[i]) & (plot_df['Divergence_Type'] == hue_order[j])])
tick_labels.append(f'{int(count)}')
ax_top.set_xticks(tick_locs)
ax_top.set_xticklabels(tick_labels, fontsize=9)
ax_top.tick_params(axis='x', direction='out', length=5, width=1.2, pad=5)
ax_top.text(0.5, 1.05, 'Sample Size (n)', transform=ax_top.transAxes,
fontsize=12, fontweight='bold', va='bottom', ha='center')
ax1.set_xlabel("Divergence Class", fontsize=12, fontweight='bold')
ax1.set_ylabel(r"$\Delta$ ($R_f$ - $R_m$)", fontsize=12, fontweight='bold')
ax1.legend(title="Genomic Region", loc='upper left')
sns.despine(ax=ax1, top=False, right=False)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / "Divergence_Boxplots_Comparison.png", dpi=300)
plt.savefig(OUTPUT_DIR / "Divergence_Boxplots_Comparison.pdf", transparent=True)3. 输出文件
Divergence_Boxplots_Comparison.pdf:箱线图(包含抖动点)