PCA主成分分析 | Zhang Lab Forum

By wuzhenzhen, 31 May, 2026

Forums

主要目的为基于LDfiltered后的SNP进行PCA主成分分析。

1 数据准备

LD修剪后的SNP文件；
样品及对应的种群列表。

2 使用 plink 进行 PCA 分析

# 示例代码
plink \
--vcf ../00.filter/all.LDfilter.vcf \
--pca 10 \ # 主成分个数
--out PCA_out \
--allow-extra-chr --set-missing-var-ids @:#

# 输出文件
PCA_out.eigenval  PCA_out.eigenvec（最重要的文件）  PCA_out.log  PCA_out.nosex

3 分析结果统计

## 查看主成分占比总和
less -S PCA_out.eigenval | awk '{sum+=$1}END{print sum}'

## 查看每个主成分的占比
less -S PCA_out.eigenval | awk '{print $1/主成分占比总和}'

4 利用R画图

#!/usr/bin/env Rscript
library(ggplot2)
library(ggrepel)
library(dplyr)

setwd("E:/文件/2博士期间文件/博士课题/羊踯躅/03 群体分析-v3/02PCA")

# 传入需要的数据
input <- "PCA_out_v2.eigenvec"
x <- 1
y <- 2   # 可以修改为3

pop <- "sample_list.txt"

outpre <- "PCA_plink_plot-v2-1"


vec <- read.table(  input, header = F , row.names = 1, sep = " ")
pop <- read.table(  pop, header = F, row.names = 1, sep = "\t")

population <- pop[rownames(vec),1]

# 为了显示种群名称
pca_df <- data.frame(
  PC1 = vec[, x + 1],
  PC3 = vec[, y + 1],
  pop = population
)
centers <- pca_df %>%
  group_by(pop) %>%
  summarise(
    PC1 = mean(PC1),
    PC3 = mean(PC3)
  )


# 新增Part2
p1 <- ggplot(
  pca_df,
  aes(x = PC1, y = PC3, colour = pop)
) +
  geom_point(size = 2.5) +
  stat_ellipse() +

  ## 给 geom_text_repel 单独指定映射
  geom_text_repel(
    data = centers,
    aes(x = PC1, y = PC3, label = pop),
    inherit.aes = FALSE,  
    size = 4,
    show.legend = FALSE
  ) +

  theme_bw()

# p1<- ggplot(mapping = aes(x=vec[,x+1],y=vec[,y+1] ,colour = population ) )+ geom_point()
p1 <- p1 +  stat_ellipse() + theme_bw()
p1 <- p1 +  xlab( paste("PC", x ,"(27.8818%)", sep = "") )
# p1 <- p1 +  ylab( paste("PC", y ,"(14.9663%)", sep = "") )
p1 <- p1 +  ylab( paste("PC", y ,"(8.84354%)", sep = "") ) #pc3 8.84354


pdf(file=paste(outpre, "pc", x, y, "pdf", sep = "."), height = 8, width = 10)
p1
dev.off()

save_plot <- function(p, filename, w=10, h=8, dpi=600){
  png(filename, width=w, height=h, units="in", res=dpi)
  print(p)
  dev.off()
}

save_plot(p1, paste(outpre, "pc", x, y, "png", sep = "."))