PCA and 3D PCA
A principal component analysis (PCA), is a way to take a large amount of data and plot it on two or three axes. It does this without knowing which groups the data belongs to, so if you perform a PCA, plot it, and the data clusters nicely into the experiment groups, you know there are distinct data signatures in your experimental groups. AKA your experiment worked. Here is a quick guide to PCA for RNAseq data.
#Load data example<- read.table(url("https://jackrrivers.com/wp-content/uploads/2018/03/exampleRNAseq.txt"), header=T) rna.mean<-(as.matrix(example[,2:13])) rna.t<-(t(rna.mean))
#PCA ##Set Colours col<-c(rep("#00cccc",6),rep("#ff6633",6)) ##Intial PCA PCA<-prcomp(rna.t, scale=T) summary_PCA<-round(data.frame(summary(PCA)$importance),3) summary_PCA ## Plot PCA par(fig=c(0, 1, 0, 1), oma=c(5, 2, 2, 2), mar=c(6, 4, 2, 2)) plot(PCA$x[,1:2], col=col,pch=16, cex=1.5,xlab="",ylab="",xlim=c(-250,150),ylim=c(100,-100)) title(main="PCA Genotype", xlab=paste0("PCA1 ",(100*summary_PCA$PC1[2]),"%"), ylab=paste0("PCA2 ",(100*summary_PCA$PC2[2]),"%")) legend("topleft", bty = "n", legend = c("WT", "KO"), pch = 16, col = c("#00cccc","#ff6633"), cex = 1, horiz=F)
##Log Transformed PCA PCA<-prcomp(log(rna.t+1), scale=F) summary_PCA<-round(data.frame(summary(PCA)$importance),3) summary_PCA ## Plot PCA par(fig=c(0, 1, 0, 1), oma=c(5, 2, 2, 2), mar=c(6, 4, 2, 2)) plot(PCA$x[,1:2], col=col,pch=16, cex=1.5,xlab="",ylab="",xlim=c(-250,150),ylim=c(100,-100)) title(main="PCA Genotype", xlab=paste0("PCA1 ",(100*summary_PCA$PC1[2]),"%"), ylab=paste0("PCA2 ",(100*summary_PCA$PC2[2]),"%")) legend("topleft", bty = "n", legend = c("WT", "KO"), pch = 16, col = c("#00cccc","#ff6633"), cex = 1, horiz=F)
#3D PCA ##Packages require(pca3d) require(magick) require(rgl) ##Image Attibutes shape<-c(1,1,1,1,1,1,4,4,4,4,4,4) col<-c(rep(7,6),rep(4,6)) ##Run PCA PCA<-prcomp(log(rna.t+1), scale=F) ##Make 3D pca3d(PCA, group=col,radius = 3,shape=shape)