# Intro to multivariate statistics
# Marco Plebani, 28 November 2018

rm(list=ls()) # its always good practice to clear R’s memory

#######
# PCA
#######
# Click here for a very good, interactive explanation of the idea behind PCA:
# http://setosa.io/ev/principal-component-analysis/

# Open the “iris” dataset:
data(iris)
#?iris # gives info about the dataset
str(iris)
head(iris)
plot(iris)
# by plotting the variables against each other it becomes obvious
# that some are strongly correlated: in other words, there is an
# overlap in the power of some variables at explaining/accounting for
# the data variability. A PCA will help disentangling these correlations.

# functions prcomp() performs PCA:
fit<-prcomp(iris[-5], scale=TRUE)
# scale=T standardizes the variables to the same relative scale,
# avoiding some variables to become dominant just because of their
# large measurement units.

summary(fit)
# the summary indicates that four PCs where created: the number
# of possible PCs always equals the number of original variables.

# PC1 and PC2 explain respectively ~73% and ~23% of the data's
# total variability, summing up to a more-than-respectable 96% of
# the total variability. There is no fixed rule about this, but
# this already tells us that all the other PCs can be ignored as
# they explain only crumbs of the total data variability.

plot(fit,type="lines")
# a "scree plot" allows a graphical assessment of the relative
# contribution of the PCs in explaining the variability of the data.

fit[2] # the "Rotation" matrix contains the "loadings" of each
# of the original variables on the newly created PCs.
# The concept of eigenvalue would require to be introduced for
# understanding how the loadings are estimated, and in general
# for a quantitative understanding of how the principal
# components are calculated: the interested reader might look
# it up in the references at this link: http://marcoplebani.com/pca/
# https://stats.stackexchange.com/questions/2691/making-sense-of-principal-component-analysis-eigenvectors-eigenvalues

biplot(fit)
# the arrows provide a graphical rendition of the
# loadings of each of the original variables over the used PCs.

# Package GGPLOT2 and its derivative GGBIPLOT have a somehow esoteric
# syntax, but they produce much fancier plots:

library(ggplot2) # this might need to be installed

# Variances of principal components
variances <- data.frame(variances=fit$sdev**2, pcomp=1:length(fit$sdev))
# **2 means ^2

#Plot of variances
varPlot <- ggplot(variances, aes(pcomp, variances)) +
geom_bar(stat="identity", fill="gray") + geom_line()
varPlot

# run these two lines only if ggbiplot is not installed yet:
#library(devtools)
#install_github("ggbiplot", "vqv")
# load ggbiplot
library(ggbiplot)

Species<-iris$Species
iris_pca <- ggbiplot(fit,obs.scale = 1,
         var.scale=1,groups=Species,ellipse=F,circle=F,varname.size=3)
# iris_pca <- iris_pca + theme_bw() # try this for a less
# ink-demanding background
iris_pca


#######
# nMDS
#######

colnames(iris)
i <- iris[,c(1:4)]
i.sp<-iris[,c(5)]
library(vegan)
i.dist <- vegdist(i, method="euclidean")
i.mds <- metaMDS(i.dist, try=50)

quartz()
plot(i.mds, display="sites",type="n",
xlab="Axis 1", ylab="Axis 2", main="Iris species")
# add color
iris$Species
iris$col<-c(rep("red",50),rep("green",50),rep("blue",50))
points(i.mds, display="sites",
    pch=21, cex=1,
    bg=iris$col
    )
text(x=-2.5, y=3,paste("Stress=",round(i.mds$stress,4)))
legend("topright",
    legend=unique(iris$Species),
    pch=21,
    pt.bg=unique(iris$col)
)


############
# perMANOVA
############

# function "adonis" performs perMANOVA:
# ?adonis
i.obs <- i
i.spp <- data.frame(species=iris[,5])
head(i.obs)
head(i.spp)
d.manova <- adonis(i.obs ~ species, method = "euclidean", data= i.spp)
d.manova

# Post-hoc pairwise comparisons:
library(devtools)
#install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
library(pairwiseAdonis)
#?pairwise.adonis
iris.adonis.pw <- pairwise.adonis(x = i.obs,
                    factors= i.spp$species,
                    sim.method='euclidean',
                    p.adjust.m='holm')
                    
iris.adonis.pw <- as.data.frame(iris.adonis.pw)
iris.adonis.pw$F.Model <- round(iris.adonis.pw$F.Model, 2)
iris.adonis.pw$R2 <- round(iris.adonis.pw$R2, 2)