PCA and 3D PCA

A principal component analysis (PCA), is a way to take a large amount of data and plot it on two or three axes. It does this without knowing which groups the data belongs to, so if you perform a PCA, plot it, and the data clusters nicely into the experiment groups, you know there are distinct data signatures in your experimental groups. AKA your experiment worked. Here is a quick guide to PCA for RNAseq data.

#Load data

example<- read.table(url("https://jackrrivers.com/wp-content/uploads/2018/03/exampleRNAseq.txt"), header=T)
rna.mean<-(as.matrix(example[,2:13]))
rna.t<-(t(rna.mean))
#PCA
##Set Colours
col<-c(rep("#00cccc",6),rep("#ff6633",6))

##Intial PCA
PCA<-prcomp(rna.t, scale=T)
summary_PCA<-round(data.frame(summary(PCA)$importance),3)
summary_PCA

## Plot PCA
par(fig=c(0, 1, 0, 1), oma=c(5, 2, 2, 2), mar=c(6, 4, 2, 2))
plot(PCA$x[,1:2], col=col,pch=16, cex=1.5,xlab="",ylab="",xlim=c(-250,150),ylim=c(100,-100))
title(main="PCA Genotype",
xlab=paste0("PCA1 ",(100*summary_PCA$PC1[2]),"%"), 
ylab=paste0("PCA2 ",(100*summary_PCA$PC2[2]),"%"))
legend("topleft", bty = "n", legend = c("WT", "KO"), 
 pch = 16, col = c("#00cccc","#ff6633"), cex = 1, horiz=F)

Rplot02

##Log Transformed PCA
PCA<-prcomp(log(rna.t+1), scale=F)

summary_PCA<-round(data.frame(summary(PCA)$importance),3)
summary_PCA

## Plot PCA par(fig=c(0, 1, 0, 1), oma=c(5, 2, 2, 2), mar=c(6, 4, 2, 2)) plot(PCA$x[,1:2], col=col,pch=16, cex=1.5,xlab="",ylab="",xlim=c(-250,150),ylim=c(100,-100)) title(main="PCA Genotype",
xlab=paste0("PCA1 ",(100*summary_PCA$PC1[2]),"%"), 
ylab=paste0("PCA2 ",(100*summary_PCA$PC2[2]),"%")) legend("topleft", bty = "n", legend = c("WT", "KO"), pch = 16, col = c("#00cccc","#ff6633"), cex = 1, horiz=F)

Rplot03

#3D PCA
##Packages
require(pca3d)
require(magick)
require(rgl)

##Image Attibutes
shape<-c(1,1,1,1,1,1,4,4,4,4,4,4)
col<-c(rep(7,6),rep(4,6))

##Run PCA
PCA<-prcomp(log(rna.t+1), scale=F)

##Make 3D
pca3d(PCA, group=col,radius = 3,shape=shape)

PCA 3D

Leave a Reply

Your email address will not be published. Required fields are marked *

R Code

Previous article

ANOVAs One-way and two-way